In [1]:
import blocksci
import matplotlib.pyplot as plt
import matplotlib.ticker
import collections
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

parser_data_directory = Path("/mnt/anal/config.json")
cluster_directory = Path("/mnt/anal/cluster/")
dumplings_directory = Path("/mnt/dumplings/")

chain = blocksci.Blockchain(str(parser_data_directory))

from typing import Tuple

def get_block_height_for_date(date: str) -> int:
    return chain.range(date)[0].height

def get_block_height_range(start: str, end: str) -> Tuple[int, int]:
    return get_block_height_for_date(start), get_block_height_for_date(end)

In [5]:
import json

wasabi2_events_file = dumplings_directory / "wasabi2_events.json"
# wasabi2_txs_file = dumplings_directory / "wasabi2_txs.json"
wasabi_events_file = dumplings_directory / "wasabi1_events.json"
# wasabi_txs_file = dumplings_directory / "wasabi_txs.json"
whirlpool_events_file = dumplings_directory / "whirlpool_events.json"
# whirlpool_txs_file = dumplings_directory / "whirlpool_txs.json"

with open(wasabi2_events_file) as f:
    wasabi2_events = json.load(f)

# with open(wasabi2_txs_file) as f:
#     wasabi2_txs = json.load(f)

with open(wasabi_events_file) as f:
    wasabi_events = json.load(f)

# with open(wasabi_txs_file) as f:
#     wasabi_txs = json.load(f)

with open(whirlpool_events_file) as f:
    whirlpool_events = json.load(f)

# with open(whirlpool_txs_file) as f:
#     whirlpool_txs = json.load(f)

In [35]:
from datetime import datetime

from_top = list(
    # sort by input value
    sorted(
        # get newer than date
        filter(
            lambda x: x.block_time > datetime(2024, 3, 1), 
            # turn into blocksci objects
            map(
                lambda x: chain.tx_with_hash(x), set(consolidations)
            )
        ),
        key=lambda x: -x.input_value
    )
)


print("57a8ea3ba1568fed4d9f7d7b3b84cdec552d9c49d4849bebf77a1053c180d0d1" in consolidations)
print("d463b35b3d18dda4e59f432728c7a365eaefd50b24a6596ab42a077868e9d7e5" in consolidations)
print("8f59577b2dfa88e7d7fdd206a17618893db7559007a15658872b665bc16417c5" in consolidations)

for i in range(10):
    print(f"https://mempool.space/tx/{from_top[i].hash}")
    print(from_top[i].input_value / 100000000, "BTC")

True
True
True
https://mempool.space/tx/57a8ea3ba1568fed4d9f7d7b3b84cdec552d9c49d4849bebf77a1053c180d0d1
76.3299438 BTC
https://mempool.space/tx/32cad2742cc904e74769f637cb63e8c94dc5dbf0f4d0c18b6cad679465c458a8
67.0 BTC
https://mempool.space/tx/d463b35b3d18dda4e59f432728c7a365eaefd50b24a6596ab42a077868e9d7e5
62.38784013 BTC
https://mempool.space/tx/8f59577b2dfa88e7d7fdd206a17618893db7559007a15658872b665bc16417c5
62.21724419 BTC
https://mempool.space/tx/e7c672f95690a3495fe50d1688311b4908e913790ec8f08dd6b76a3f401a3982
44.98111892 BTC
https://mempool.space/tx/d2183b4d4838f0a767f8c82a449e735d344fdc1fb27109a0a3a0c9317f90c1d6
36.24910764 BTC
https://mempool.space/tx/c9d870f7b73b1f10d217d625208968c2690df864c7e39e69746ad538cd25041c
33.054 BTC
https://mempool.space/tx/aaff209fcdef69736ac59bc6d82637cf5c8d1f24f31561398b424284203de199
29.9 BTC
https://mempool.space/tx/217240acda764b244aa4878f9f3c7e0934997fef0c9c82b00a787edccbd550e9
29.525 BTC
https://mempool.space/tx/deccc09df7f3549300b13b36d4b26f1

In [3]:
first_in_pool_denominations = {}

tx: blocksci.Tx
    
for tx in filtered_whilrpool_coinjoins:
    pool_size = str(min(map(lambda t: t.value, tx.inputs)))
    if pool_size not in first_in_pool_denominations:
        first_in_pool_denominations[pool_size] = [tx, 0]
    else:
        first_in_pool_denominations[pool_size][1] += 1

CPU times: user 1min 32s, sys: 188 ms, total: 1min 32s
Wall time: 645 ms


In [4]:
%time filtered_ww1_events = chain.filter_coinjoin_txes(0, len(chain), "ww2")

CPU times: user 29 s, sys: 2h 13min 50s, total: 2h 14min 19s
Wall time: 35.9 s


In [7]:
for events in ((filtered_ww1_events, "wasabi1", wasabi_events),):
    s, name, e = events
    d = set(map(lambda x: str(x.hash), s))
    res = {'only_in_dumplings': [], 'only_in_blocksci': [], 'in_both': []}

    for tx in s:
        if str(tx.hash) in d and str(tx.hash) in e:
            res['in_both'].append(tx)
        elif str(tx.hash) in d:
            res['only_in_blocksci'].append(tx)

    for tx in e.keys():
        if tx not in d:
            res['only_in_dumplings'].append(tx)

    print(f"\nFor {name}:\nOnly in dumplings: {len(res['only_in_dumplings'])}, only in blocksci: {len(res['only_in_blocksci'])}, in both: {len(res['in_both'])}")


For wasabi1:
Only in dumplings: 0, only in blocksci: 107, in both: 34955


In [8]:
%time result = chain.get_address_types(get_block_height_for_date('2023-01-01'), len(chain))

print(type(result), len(result))

for k, v in sorted(result.items(), key=lambda x: x[1], reverse=True):
    print(f"{k}: {v}")

CPU times: user 1min 11s, sys: 60 ms, total: 1min 11s
Wall time: 547 ms
<class 'dict'> 9
Pay to witness pubkey hash: 272536756
Pay to witness unknown: 197219154
Pay to script hash: 103787612
Pay to pubkey hash: 85680424
Null data: 42567683
Pay to witness script hash: 15242387
Multisig: 1256112
Nonstandard: 11065
Pay to pubkey: 289


# Basic analysis
Here are some basic analyses for the coinjoins just to make sure nothign fishy is happening

In [3]:
def find_min_max(tx: blocksci.Tx) -> Tuple[int, int]:
    return (tx.input_count, tx.output_count)

def find_number_of_unique_txs(tx: blocksci.Tx) -> Tuple[int, int]:
    return len(set([x.value for x in tx.inputs])), len(set([x.value for x in tx.outputs]))
    
def find_timestamped_txes(tx: blocksci.Tx) -> bool:
    return tx.time_seen is not None and tx.timestamp_seen is not None

In [None]:
unique_counts = chain.map_spliterator(map_func=find_number_of_unique_txs, keys=list(wasabi2_events.keys()), data_directory=str(parser_data_directory), workers=64)

In [None]:
%time timestamped = chain.map_spliterator(map_func=find_timestamped_txes, keys=list(map(lambda t: str(t.hash), chain.blocks.txes)), data_directory=str(parser_data_directory), workers=64)

In [None]:
from statistics import median

flattened_unique_inputs = [x[0] for y in unique_counts for x in y]
flattened_unique_outputs = [x[1] for y in unique_counts for x in y]

print("inputs:")
print(f"min: {min(flattened_unique_inputs)}, max: {max(flattened_unique_inputs)}, median: {median(flattened_unique_inputs)}")

print("outputs:")
print(f"min: {min(flattened_unique_outputs)}, max: {max(flattened_unique_outputs)}, median: {median(flattened_unique_outputs)}")

In [None]:
minmaxes = chain.map_spliterator(map_func=find_min_max, keys=list(wasabi2_events.keys()), data_directory=str(parser_data_directory), workers=64)

In [None]:
inp, out = minmaxes[0][0]

for x in minmaxes:
    for input1, output1 in x:
        if input1 > inp:
            inp = input1
        if output1 > out:
            out = output1
            
print(inp, out)
        

# Remix analysis

Here we have the map functions for different remix analyses. The functions should have the following interface
`map_func(tx: blocksci.Tx, **kwargs) -> T`
where `T` is the common result type. The result will be added to the list of results of each worker.

`kwargs` are the arguments passed to each worker.

In [None]:
def find_remixes_within_one_hop(tx: blocksci.Tx, **kwargs):
    """Pass in `events` as kwarg. Will check whether there is an output of `tx` in `events`.
    We can count this as 'remix' transaction.
    """
    cj_events = kwargs['events']
    result = (tx.hash, tx.output_count, [])
    for c, i in enumerate(tx.outputs):
        if not i.is_spent:
            continue
        
        if str(i.spending_tx.hash) in cj_events:
            result[2].append((i.index, True))
            
    return result
        

### Results processing

We take the outputs of the above functions and compute various statistics.

In [None]:
wasabi2_results = chain.map_spliterator(map_func=find_remixes_within_one_hop, keys=list(wasabi2_txs["coinjoins"].keys()), data_directory=str(parser_data_directory), workers=64, events=wasabi2_events.keys())
wasabi_results = chain.map_spliterator(map_func=find_remixes_within_one_hop, keys=list(wasabi_txs["coinjoins"].keys()), data_directory=str(parser_data_directory), workers=64, events=wasabi_events.keys())
whirlpool_results = chain.map_spliterator(map_func=find_remixes_within_one_hop, keys=list(whirlpool_txs["coinjoins"].keys()), data_directory=str(parser_data_directory), workers=128, events=whirlpool_events.keys())

In [None]:
def compute_remix_stats(results, events, key):
    stats_computed = {"remix": 0, "left": 0}
    
    for one in results:
        for txid, all_outputs, actual in one:
            stats_computed["remix"] += len(actual)
            stats_computed["left"] += all_outputs - len(actual)
    
    print(key)
    print(stats_computed)

    dumplings_stats = {"dumplings_remix": 0}

    for tx in events.values():
        for out in tx["outputs"].values():
            if out["mix_event_type"] == "MIX_REMIX":
                dumplings_stats["dumplings_remix"] += 1

    print(dumplings_stats)
    print(f"dumplings - computed: {dumplings_stats['dumplings_remix'] - stats_computed['remix']}\n")

compute_remix_stats(wasabi2_results, wasabi2_txs["coinjoins"], "wasabi2 1 hop")
compute_remix_stats(wasabi_results, wasabi_txs["coinjoins"], "wasabi 1 hop")
compute_remix_stats(whirlpool_results, whirlpool_txs["coinjoins"], "whirlpool 1 hop")

# Consolidation analysis

The functions for consolidation analysis are here. The interface is still the same:
`map_func(tx: blocksci.Tx, **kwargs) -> list[T]`

In [None]:
from typing import List, Dict, Tuple

def find_outputs_one_hop(tx: blocksci.Tx, found: Dict[str, List[str]]):
    for output in tx.outputs:
        if not output.is_spent:
            continue

        output_spent_in = str(output.spending_tx.hash)
        if output_spent_in not in found:
            found[output_spent_in] = 0
        found[output_spent_in] += 1



def find_consolidation(tx: blocksci.Tx, **kwargs) -> List[Tuple[str, Dict[str, List[str]]]]:
    found_for_tx = {}
    find_outputs_one_hop(tx, found_for_tx)
    return str(tx.hash), found_for_tx


def has_tx_output_in_coinjoin_events(tx: blocksci.Tx, **kwargs) -> bool:
    cj_events = kwargs["events"]
    for output in tx.outputs:
        if not output.is_spent:
            continue

        if str(output.spending_tx.hash) in cj_events:
            return True

    return False


def find_outputs_two_hops(tx: blocksci.Tx, found: Dict[str, List[str]]):
    for output in tx.outputs:
        if not output.is_spent:
            continue

        if output.spending_tx.output_count < 2:
            output_spent_in = str(output.spending_tx.hash)
            if output_spent_in not in found:
                found[output_spent_in] = 0
            found[output_spent_in] += 1
            continue

        for output2 in output.spending_tx.outputs:
            if not output2.is_spent:
                continue
                
            if output2.spending_tx.output_count < 2:
                output_spent_in = str(output2.spending_tx.hash)
                if output_spent_in not in found:
                    found[output_spent_in] = 0
                found[output_spent_in] += 1
                continue


def find_outputs_three_hops(tx: blocksci.Tx, found: Dict[str, List[str]]):
    for output in tx.outputs:
        if not output.is_spent:
            continue
            
        found_match = False

        if output.spending_tx.output_count < 2:
            output_spent_in = str(output.spending_tx.hash)
            if output_spent_in not in found:
                found[output_spent_in] = 0
            found[output_spent_in] += 1
            continue

        for output2 in output.spending_tx.outputs:
            if found_match:
                break
            if not output2.is_spent:
                continue
                
            if output2.spending_tx.output_count < 2:
                output_spent_in = str(output2.spending_tx.hash)
                if output_spent_in not in found:
                    found[output_spent_in] = 0
                found[output_spent_in] += 1
                found_match = True
                break
            
            for output3 in output2.spending_tx.outputs:
                if not output3.is_spent:
                    continue

                if output3.spending_tx.output_count < 2:
                    output_spent_in = str(output3.spending_tx.hash)
                    if output_spent_in not in found:
                        found[output_spent_in] = 0
                    found[output_spent_in] += 1
                    found_match = True
                    break
                    
def find_consolidation_three_hops(tx: blocksci.Tx):
    found_for_tx = {}
    find_outputs_three_hops(tx, found_for_tx)
    return (str(tx.hash), found_for_tx)


def find_consolidation_two_hops(tx: blocksci.Tx):
    found_for_tx = {}
    find_outputs_two_hops(tx, found_for_tx)
    return (str(tx.hash), found_for_tx)





In [None]:
%time wasabi2_consolidation_three_hop = chain.map_spliterator(map_func=find_consolidation_three_hops, keys=list(wasabi2_events.keys()), data_directory=str(parser_data_directory), workers=64)
# %time wasabi_consolidation_three_hop = chain.map_spliterator(map_func=find_consolidation_three_hops, keys=list(wasabi_events.keys()), data_directory=str(parser_data_directory), workers=64)


In [None]:
%time wasabi_consolidation_two_hop = chain.map_spliterator(map_func=find_consolidation_two_hops, keys=list(wasabi_events.keys()), data_directory=str(parser_data_directory), workers=64)

In [None]:
%time whirlpool_consolidation_two_hop = chain.map_spliterator(map_func=find_consolidation_two_hops, keys=list(whirlpool_events.keys()), data_directory=str(parser_data_directory), workers=128)


### Consolidation analyses

Following are the functions computing the actual result analyses.

In [None]:
from collections import defaultdict


def compute_consolidated_txes_in_one_hop(consolidated):
    total_outgoing = 0
    counts_of_consolidated_txes_in_one_hop = defaultdict(int)
    for root_tx, dct in consolidated:
            for outbound_tx, count in dct.items():
                if outbound_tx in wasabi2_events or outbound_tx in wasabi_events or outbound_tx in whirlpool_events:
                    continue

                counts_of_consolidated_txes_in_one_hop[count] += 1
    return counts_of_consolidated_txes_in_one_hop


# w2_consolidation_1hop = compute_consolidated_txes_in_one_hop(wasabi2_consolidation)
# w_consolidation_1hop = compute_consolidated_txes_in_one_hop(wasabi_consolidation)
# whirl_consolidation_1hop = compute_consolidated_txes_in_one_hop(whirlpool_consolidation)

def plot_barplot_from_dictionary(dct):
    plt.bar(range(len(dct)), list(dct.values()), align='center')
    plt.xticks(range(len(dct)), list(dct.keys()))
    plt.show()



def make_graph(data, name):
    fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, ncols=1, figsize=(8, 9))

    # Plot the data for 2-8 on the first subplot
    ax1.bar(list(range(2, 9)), [data[key] for key in range(2, 9)])
    ax1.set_xlabel('X-axis')
    ax1.set_ylabel('Y-axis (2-8)')
    ax1.set_title(f'{name}: Bar Plot (2-8)')

    # Plot the data for 9-15 on the second subplot
    ax2.bar(list(range(9, 16)), [data[key] for key in range(9, 16)])
    ax2.set_xlabel('X-axis')
    ax2.set_ylabel('Y-axis (9-15)')
    ax2.set_title(f'{name}: Bar Plot (9-15)')

    ax3.bar(list(range(16, 23)), [data[key] for key in range(16, 23)])
    ax3.set_xlabel('X-axis')
    ax3.set_ylabel('Y-axis (16-22)')
    ax3.set_title(f'{name}: Bar Plot (16-22)')

    # Adjust the spacing between subplots
    plt.subplots_adjust(hspace=0.5)

    # Display the plot
    plt.show()

# print(f"w2 unpaired: {w2_consolidation_1hop[1]}")
# print(f"w unpaired: {w_consolidation_1hop[1]}")
# print(f"whirlpool unpaired: {whirl_consolidation_1hop[1]}")

# make_graph(w2_consolidation_1hop, "wasabi2")
# make_graph(w_consolidation_1hop, "wasabi")
# make_graph(whirl_consolidation_1hop, "whirlpool")



# w2_2hop_consolidated = compute_consolidated_txes_in_one_hop(wasabi2_consolidation_two_hop)
# w_2hop_consolidated = compute_consolidated_txes_in_one_hop(wasabi_consolidation_two_hop)
# wh_2hop_consolidated = compute_consolidated_txes_in_one_hop(whirlpool_consolidation_two_hop)


# make_graph(w2_2hop_consolidated, "two hop consolidation to one output wasabi2")
# make_graph(w_2hop_consolidated, "two hop consolidation to one output wasabi")
# make_graph(wh_2hop_consolidated, "two hop consolidation to one output whirlpool")

print(len(res_w2))
w2_consolidation_3hop = compute_consolidated_txes_in_one_hop(res_w2)
w_consolidation_3hop = compute_consolidated_txes_in_one_hop(res_w1)


print(f"w2 unpaired: {w2_consolidation_3hop[1]}")
print(f"w unpaired: {w_consolidation_3hop[1]}")


make_graph(w2_consolidation_3hop, "wasabi2")
make_graph(w_consolidation_3hop, "wasabi")

In [None]:
in_tx = '0002eb55bd780c4fc10f212bb686a0a9426ef11d611827605ba8f82db30bcbbc'
consolidated_tx = '0abebd6704fcd886b1e74815ce05a24a11aa2d0e543729d6dbd18629c72874a7'

in_tx = chain.tx_with_hash(in_tx)
consolidated_tx = chain.tx_with_hash(consolidated_tx) 

print(consolidated_tx.output_count)


from typing import Optional, Set

consolidated_txs_lost = list(find_consolidation_two_hops(in_tx))[1]

# print(consolidated_txs)
consolidated_txs = [x for x, y in consolidated_txs_lost.items() if y < 10]
print(len(consolidated_txs), consolidated_txs_lost[consolidated_txs[0]])

def subset_sum_rec(nums: List[int], total: int, start: int, memo: Dict[Tuple[int, int], Optional[Set[int]]]) -> Optional[Set[int]]:
    if total == 0:
        return set()

    if start == len(nums):
        return None

    key = (start, total)
    if key in memo:
        return memo[key]

    num = nums[start]
    if num.value > total:
        memo[key] = None
        return None

    result = subset_sum_rec(nums, total - num.value, start + 1, memo)
    if result is not None:
        result.add(num)
        memo[key] = result
        return result

    result = subset_sum_rec(nums, total, start + 1, memo)
    memo[key] = result
    return result

def find_sum_candidates(tx, inputs, output_value):
    memo = {}
    sorted_inputs = sorted(filter(lambda y: y.value <= output_value, inputs), key=lambda x: x.value)
    return subset_sum_rec(sorted_inputs, output_value, 0, memo)

a = set()
for i in range(3):
    inputs = set(in_tx.inputs) - a
    value = chain.tx_with_hash(consolidated_txs[i]).outputs[0].value
    r = find_sum_candidates(in_tx, inputs, value)
    if r:
        r = a

print(a)


# embrace VUT

Let's try to use some analyses with [coinomon](https://coinomon.bazar.nesad.fit.vutbr.cz/#/Authentication/login).

- get all coinjoins in one month (say Feb 23)
- get all output txs from them (with one output)
- pick one randomly and get some data about them from coinomon
- ???
- profit

In [None]:
import requests
from typing import Dict, Any

class CoinomonClient:
    def __init__(self, token: str) -> None:
        self.crypto = "BTC"
        self.headers = {"Authorization": f"Bearer {token}"}
        self.base_url = "https://coinomon.bazar.nesad.fit.vutbr.cz/"
    
    def get_address_info(self, address: str) -> Dict[str, Any]:
        response = requests.get(f"{self.base_url}jwt/v1/{self.crypto}/cryptoaddress/{address}/summary", headers=self.headers)
        if response.status_code >= 400:
            print(str(response.text))
            return {}
        
        return response.json()
    
    
    def get_cluster_info(self, cluster_id: str) -> Dict[str, Any]:
        response = requests.get(f"/jwt/v1/{self.crypto}/cryptocluster/{cluster_id}", headers=self.headers)
        if response.status_code >= 400:
            print(str(response))
            return {}
        
        return response.json()
    
    def get_cluster_addresses(self, cluster_id: str) -> Dict[str, Any]:
        response = requests.get(f"/jwt/v1/{self.crypto}/cryptocluster/{cluster_id}/addresses", headers=self.headers)
        if response.status_code >= 400:
            print(str(response))
            return {}
        
        return response.json()
    
token = "insert here"

coinomon_client = CoinomonClient(token)

In [39]:
# start, end = get_block_height_range('2023-02-01', '2023-02-28')
# %time res = chain.find_consolidation_3_hops(wasabi2_events, start, end)
# %time res_w1 = chain.find_consolidation_3_hops(wasabi_events, 0, len(chain))
%time res_w2 = chain.find_consolidation_3_hops(wasabi2_events, 0, len(chain))
# %time res_wh = chain.find_consolidation_3_hops(whirlpool_events, 0, len(chain))

# %time res = chain.find_consolidation_3_hops(whirlpool_events, 774513, 778584)

CPU times: user 7min 24s, sys: 5min, total: 12min 24s
Wall time: 5.04 s


In [1]:
import json

max_outputs = "", "", 0
            
for tx, outputs in res:
    for out, val in outputs.items():
        if val > max_outputs[2]:
            max_outputs = out, tx, val

print(len(res))
print(max_outputs)
tx = chain.tx_with_hash(max_outputs[0])
coinomon_data = coinomon_client.get_address_info(tx.outputs[0].address.address_string)
coinomon_data["data"].pop("firstTx")
coinomon_data["data"].pop("lastTx")
print(json.dumps(coinomon_data, indent=4))

tx_start = chain.tx_with_hash(max_outputs[1])

coinomon_data = coinomon_client.get_address_info(tx_start.outputs[0].address.address_string)
coinomon_data["data"].pop("firstTx")
coinomon_data["data"].pop("lastTx")
print(json.dumps(coinomon_data, indent=4))




NameError: name 'res' is not defined

In [None]:
for tx, _ in res:
    tx = chain.tx_with_hash(tx)
    coinomon_data = coinomon_client.get_address_info(tx.outputs[0].address.address_string)
    if coinomon_data["data"]["alarms"]:
        print(coinomon_data)
        break

# Analysis over the coinjoins (friends do not pay)
**(petrs request)**

- export all transactions `X`:
    - X is 2 hops away from a ww2 coinjoin
    - all inputs to X are from a WW2 coinjoin
- output the same as `wasabi2_events.json`
Structure:
- txid
    - txid
    - block_index
    - broadcast_time
    - inputs
        - input number
            - value
            - wallet_name
            - mix_event_type
    - outputs
        - output number
            - value
            - wallet_name
            - mix_event_type
    - num_inputs
    - num_outputs
    
# !!!!!!! OUTPUT IS SPENT IN SPENDING TX !!!!!!!
# !!!!!!! INPUT WAS SPENT IN SPENT TX !!!!!!!

In [2]:
tx = chain.tx_with_hash("253d7fce540ec3cfa3acc34704d0e23719beb4e3b9b577113b55f179a0f44208")
print(tx.is_wasabi1_conjoin)
# for o in tx.outputs:
#     print(o.address.address_string == 'bc1qs604c7jv6amk4cxqlnvuxv26hv3e48cds4m0ew')


True
WW1
253d7fce540ec3cfa3acc34704d0e23719beb4e3b9b577113b55f179a0f44208
1
67


In [None]:
from typing import Dict, List


# outputs list of transactions where the condition holds
def find_friends_do_not_pay_txes(tx: blocksci.Tx, **kwargs) -> List[str]:
    ww2_events = kwargs["ww2_events"]
    result = []
    for out1 in tx.outputs:
        if not out1.is_spent:
            continue
        
        for out2 in out1.spending_tx.outputs:
            if not out2.is_spent:
                continue
            
            if not out2.spending_tx.hash in ww2_events:
                continue
            
            curr = out2.spending_tx
            for inp in curr.inputs:
                if str(inp.spent_tx.hash) not in ww2_events:
                    break
            else:
                result.append(str(curr.hash))
            
            
            

%time friends = chain.map_spliterator(map_func=find_friends_do_not_pay_txes, keys=list(wasabi2_events.keys()), data_directory=str(parser_data_directory),ww2_events=wasabi2_events,workers=64)

In [None]:
start, stop = get_block_height_range('2023-01-01', '2023-02-01')
%time friends = chain.find_friends_who_dont_pay(keys=wasabi2_events, start=0, stop=len(chain))

In [None]:
from typing import Dict, Any, List

def process_inputs(tx: blocksci.Tx) -> List[Dict[str, Any]]:
    res = []
    inp: blocksci.Input
    for inp in tx.inputs:
        spent_tx: blocksci.Tx = inp.spent_tx
        imm = {
            str(inp.index): {
                "value": inp.value,
                "wallet_name": inp.address,
                "is_ww2_coinjoin": str(inp.spent_tx.hash) in wasabi2_events
            }
        }
        res.append(imm)
        
    return res

counter = 0

def process_outputs(tx: blocksci.Tx) -> List[Dict[str, Any]]:
    res = []
    out: blocksci.Output
    for out in tx.outputs:
        imm = {
            str(out.index): {
                "value": out.value,
                "wallet_name": out.address,
                "is_ww2_coinjoin": out.is_spent and str(out.spending_tx.hash) in wasabi2_events
            }
        }
        res.append(imm)
        
    return res


def fill_json_info(tx: blocksci.Tx) -> Dict[str, Any]:
    return {
        "txid": str(tx.hash),
        "block_index": str(tx.block_height),
        "broadcast_time": tx.block_time,
        "num_inputs": tx.input_count,
        "num_outputs": tx.output_count,
        "inputs": process_inputs(tx),
        "outputs": process_outputs(tx),
    }

result = {}

for tx_id in friends:
    tx = chain.tx_with_hash(tx_id)
    result[tx_id] = fill_json_info(tx)
    
print(len(result))


In [3]:
strict = False
%time w2_wp = chain.find_traverses_in_coinjoin_flows(0, len(chain), wasabi2_events, whirlpool_events, strict)
%time wp_w2 = chain.find_traverses_in_coinjoin_flows(0, len(chain), whirlpool_events, wasabi2_events, strict)
%time w1_wp = chain.find_traverses_in_coinjoin_flows(0, len(chain), wasabi_events, whirlpool_events, strict)
%time wp_w1 = chain.find_traverses_in_coinjoin_flows(0, len(chain), whirlpool_events, wasabi_events, strict)
%time w2_w1 = chain.find_traverses_in_coinjoin_flows(0, len(chain), wasabi2_events, wasabi_events, strict)
%time w1_w2 = chain.find_traverses_in_coinjoin_flows(0, len(chain), wasabi_events, wasabi2_events, strict)

CPU times: user 47min 49s, sys: 1h 39min 23s, total: 2h 27min 13s
Wall time: 43.7 s
CPU times: user 1h 14min 31s, sys: 5.2 s, total: 1h 14min 36s
Wall time: 25.6 s
CPU times: user 1h 1min 55s, sys: 1.44 s, total: 1h 1min 56s
Wall time: 25.5 s
CPU times: user 1h 15min 17s, sys: 3.7 s, total: 1h 15min 21s
Wall time: 27.6 s
CPU times: user 52min 57s, sys: 3.6 s, total: 53min
Wall time: 17.7 s
CPU times: user 53min 22s, sys: 3.95 s, total: 53min 26s
Wall time: 18.2 s


In [4]:
labels = ["Wasabi2 -> Whirlpool", "Whirlpool -> Wasabi2", "Wasabi1 -> Whirlpool", "Whirlpool -> Wasabi1", "Wasabi2 -> Wasabi1", "Wasabi -> Wasabi2"]
results = [w2_wp, wp_w2, w1_wp, wp_w1, w2_w1, w1_w2]

def is_case_1(x):
    return len(x[4]) == 0
sums = 0

for label, result in zip(labels, results):
    print(label, f"Total length: {len(result)}, case 1: {len(list(filter(is_case_1, result)))}, case 2: {len(list(filter(lambda x: not is_case_1(x) , result)))}")
    satoshis_case1 = sum(map(lambda x: x[3], filter(lambda x: is_case_1(x), result)))
    satoshis_case2 = sum(map(lambda x: x[3], filter(lambda x: not is_case_1(x), result)))
    print(label)
    print(f"\tCASE 1 HOP: total: {satoshis_case1} satoshis / {round(satoshis_case1 / 100000000, 2)} BTC")
    print(f"\tCASE 2 HOPS: total: {satoshis_case2} satoshis / {round(satoshis_case2 / 100000000, 2)} BTC")
    
    sums += len(list(filter(lambda x: is_case_1(x), result)))
    
print(sums)

Wasabi2 -> Whirlpool Total length: 1194, case 1: 42, case 2: 1152
Wasabi2 -> Whirlpool
	CASE 1 HOP: total: 2834782749 satoshis / 28.35 BTC
	CASE 2 HOPS: total: 51379000302 satoshis / 513.79 BTC
Whirlpool -> Wasabi2 Total length: 3198, case 1: 1549, case 2: 1649
Whirlpool -> Wasabi2
	CASE 1 HOP: total: 22507026905 satoshis / 225.07 BTC
	CASE 2 HOPS: total: 9515463713 satoshis / 95.15 BTC
Wasabi1 -> Whirlpool Total length: 3183, case 1: 28, case 2: 3155
Wasabi1 -> Whirlpool
	CASE 1 HOP: total: 265722659 satoshis / 2.66 BTC
	CASE 2 HOPS: total: 95272290375 satoshis / 952.72 BTC
Whirlpool -> Wasabi1 Total length: 1660, case 1: 608, case 2: 1052
Whirlpool -> Wasabi1
	CASE 1 HOP: total: 8938035036 satoshis / 89.38 BTC
	CASE 2 HOPS: total: 10341313441 satoshis / 103.41 BTC
Wasabi2 -> Wasabi1 Total length: 653, case 1: 394, case 2: 259
Wasabi2 -> Wasabi1
	CASE 1 HOP: total: 4108480212 satoshis / 41.08 BTC
	CASE 2 HOPS: total: 4218390033 satoshis / 42.18 BTC
Wasabi -> Wasabi2 Total length: 7212

In [6]:
output_json = []

label_result_mapping = {x: set(map(lambda z: z[0], filter(lambda z: len(z[4]) == 0, y))) for x, y in zip(labels, results)}

for label, value in zip(labels, results):
# for label, values in mixed.items():
#     print(label)
    for txid, in_cjs, out_cjs, value, pairs in value:
#     for txid in values:
        if len(pairs) != 0 and '-> Whirlpool' in label:
            pass
        elif len(pairs) != 0:
            continue
            
#         if txid in label_result_mapping[label]:
#             continue
            
        tx = chain.tx_with_hash(txid)
            
        r = {
            "txid": str(tx.hash),
            "broadcast_time": tx.block_time.isoformat(),
            "in_cjs": {x: {'value': y, 'broadcast_time': chain.tx_with_hash(x).block_time.isoformat()} for x, y in in_cjs.items()},
            "out_cjs": {x: {'value': y, 'broadcast_time': chain.tx_with_hash(x).block_time.isoformat()} for x, y in out_cjs.items()},
            "flow_direction": label,
            "sats_moved": value,
            "hop_tx_cj_pairs": {x: y for x, y in pairs}
        }
        
#         output_json[txid] = values[txid]
        output_json.append(r)
    print(len(output_json))
        
        
# print(len(output_json))
with open('/mnt/dumplings/one_hop_flows.json', 'w') as f:
    json.dump(output_json, f)

1194
2743
5926
6534
6928
9789


In [83]:
with open("/mnt/dumplings/mix_flows.json", "r") as f:
    mixed = json.load(f)
    
    
print(mixed['Wasabi2 -> Wasabi1']['7e2083774fffa386464b5c35184337d6e5e80c8f5022248a63c31a9fe2584ea0'])
for i in label_result_mapping:
    print(i, len(label_result_mapping[i]))

curr = set(map(lambda x: x[0], wp_w2))

cnt = 0
for i in mixed['Whirlpool -> Wasabi2']:
    if i not in curr:
        cnt += 1
        if i not in wasabi2_events:
            print(i)
            print(mixed['Whirlpool -> Wasabi2'][i])
        
print(cnt)

{'broadcast_time': '2022-10-20 05:27:23.000', 'value': 9920232}
Wasabi2 -> Whirlpool 42
Whirlpool -> Wasabi2 1549
Wasabi1 -> Whirlpool 28
Whirlpool -> Wasabi1 608
Wasabi2 -> Wasabi1 394
Wasabi -> Wasabi2 2861
4ac7e9e2e58a220f866c265dd391137d83e35695ba052f8c5fc1307c0811d13a
{'broadcast_time': '2023-03-06 01:07:19.000', 'value': 100000}
7516d0146d6ca2bdb1dea717d25828537e73b949151338e295cd340b59e91a16
{'broadcast_time': '2023-03-03 07:35:06.000', 'value': 1000000}
e85266be2372837873e2141e0db363a047c485e6043934c5662aaed3b6211bfd
{'broadcast_time': '2023-04-03 23:47:12.000', 'value': 50000000}
135


# Real consolidation analysis

### Consolidation types
1. only 1 output -> certain consolidation
2. 1-5 big outputs, more small outputs
3. outputs that in dollars make a nice round number


### TODO
- [x] first get all cjs and their certain 1-hop consolidations
- [x] then make it so we can go more hops (while loops)
- [x] then then experiment with other consolidation types and thresholds
- [ ] profit

In [29]:
%time r = chain.get_coinjoin_consolidations(0, len(chain), 5, "wp", 0)

CPU times: user 1min 58s, sys: 1.93 s, total: 2min
Wall time: 4.77 s


In [3]:
print(sum(sum(x.input_value for x in y["certain"]) for _, y in r) / 100000000)
# print(50968643318271794 / 100000000)

2806776.944629


In [4]:
print(sum(sum(x.input_value for x in y["certain"]) for _, y in r) / 100000000)

2806776.944629


In [30]:
certain = set(x for _, y in r for x in y["certain"])
possible = set(x for _, y in r for x in y["possible"])

print(sum(x.input_value for x in certain) / 100000000, sum(x.input_value for x in possible) / 100000000)

5339.596 3493.718


In [31]:
import json
c = list(sorted(certain, key=lambda tx: -tx.input_value))
print(len(c))

final_output = []
    
for i in range(1000):
    tx: blocksci.Tx
    tx = c[i]
    out = {
        "hash": str(tx.hash),
        "mempool_link": tx.mempool_space_link,
        "block": tx.block_height,
        "time": tx.block_time.isoformat(),
        "hops": 1,
        "value": tx.input_value,
        "consolidations": [
            {
                "hash": str(x.spent_tx.hash),
                "mempool_link": x.spent_tx.mempool_space_link,
                "type": "wp", 
                "block": x.spent_tx.block_height,
                "time": x.spent_tx.block_time.isoformat(),
            }
            for x in tx.inputs if x.spent_tx.is_whirlpool_coinjoin
        ]
    }
    
    final_output.append(out)
    
with open("/mnt/anal/certain_consolidations_wp.json", "w") as f:
    json.dump(final_output, f, indent=2)


18690


In [6]:
# <tx, <c1, c2, c3...>>

mapped_consolidations = list(
    map(
        lambda pair: (
            pair[0],
            {
                "certain": {
                    "sum": sum(x.input_value for x in pair[1]["certain"]),
                    "consolidations": set(pair[1]["certain"]),
                },
                "possible": {
                    "sum": sum(x.input_value for x in pair[1]["possible"]),
                    "consolidations": set(pair[1]["possible"]),
                },
            }
        ),
        r
    ),
)

sorted_certain_consolidations = list(sorted(mapped_consolidations, key=lambda x: -x[1]["certain"]["sum"]))
sorted_possible_consolidations = list(sorted(mapped_consolidations, key=lambda x: -x[1]["possible"]["sum"]))

In [None]:
from typing import List

for i in range(1):
    tx: blocksci.Tx
    cons: List[blocksci.Tx]
    tx, cons = sorted_certain_consolidations[i]
    print('--' * 20)
    print(tx.mempool_space_link)
    print(cons["certain"]["sum"] / 100000000)
    for tx in sorted(cons["certain"]["consolidations"], key=lambda x: -x.input_value):
        print(tx.mempool_space_link)
        print(tx.input_value / 100000000)
        print(tx.input_count, tx.output_count)
        
    print('--' * 20)

# I can't anymore

Here I try to do the clustering again

In [4]:
import random

one_random_wp_cj = chain.tx_with_hash("9ea3e5d217c76e6be6ace7fe4fb3622976103466de81d2b02cd87c54f9e91a2f")

# clusterer = blocksci.cluster.ClusterManager(str(cluster_directory / "fr"), chain)


%time clusterer = blocksci.cluster.ClusterManager.create_coinjoin_clustering(chain, 0, len(chain), "/mnt/anal/fr/ww2_2/", overwrite=True, coinjoin_type="whirlpool")


CPU times: user 14min 33s, sys: 1min 47s, total: 16min 21s
Wall time: 2min 23s


In [5]:
consolidation: blocksci.cluster.Cluster = clusterer.cluster_with_address(one_random_wp_cj.outputs[-2].spending_tx.outputs[0].address)

print(consolidation.address_count())

for address in consolidation.addresses.to_list():
    print(str(address))


print(len(clusterer.clusters()))

print("AA")

21
bc1qqp0346cszv9wymgpns9canqttuazmhgz4r67gr
bc1qtwen0flmynfh324t64cs42dsslhrd72xpvc3re
bc1qvsnfprcnv09lmyv3dljjflc0kjur2uaasy5c58
bc1qkq8v6x35ywfwf8slr6j6y4czqqag0lz3fac3kv
bc1qxzat6pfh95g85zpysuxw52nutnw6nn6g96vlag
bc1q7v9dtunx3f8jd66uqz6vrcp9uq3wyfd0hnyr9j
bc1q66zzn0fc2wwhr4nuwdzlcl5eslw8qpufeqcyd9
bc1qhlfq4zk0h2yukr344q5l3kg3f2yml9jw7kt030
bc1qeweamqqafrd5ku9c0clehucg45sut4gjdzzvrs
bc1q4wnn6u3ufn4cxttxpl9j8qz3m652pztrnjht4e
bc1qx3hyqdml6zq9m0qs93t6j3w7cu2mmtyget0l53
bc1qs82fe6dt3cd5q3txqca0u8kvvkzzxm4pmptjus
bc1qk60hljcejrn958rkththg362q5spnxh8r7zjjs
bc1qnmgppypp0nmc9ag6gt4argr0pwwuw8838n6z0e
bc1qarlkgd2xhxyjngeu74gu6cg5vxvqxp2gdmcrcw
bc1qcvd57u60tkl4x9an4629qruetukx3eupxw55y7
bc1qg9xepp0dz0rhmqm6vz27zq0ykh43vt02ej40yh
bc1qgcnusx0c37t0rxvsrycl8r3u5t4e8lhurxxghs
bc1q36whjkxq68gw6dqwqcrv9xgfcrd5nacxk7unsv
bc1qrurh38dysxulstemmq3hvn59de8mhc9846mfee
bc1q6fdqf6mhk5a7weuf9gxss63nsvhchr24xa904u
2350697238
AA


In [4]:
print(one_random_wp_cj.block)
# 2350869805

Block(tx_count=3061, height=839164, header_hash=0000000000000000000128c51f1198597b06af19bdd633849d3af2adf7509fe2, version=629710848, timestamp=1713089976, bits=386089497, nonce=3992381634)
