# Predict change of 2-out transactions where the change has not been revealed yet

In [1]:
import blocksci

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [3]:
import collections
import random

In [4]:
import utils

In [5]:
import analysis

In [6]:
chain = blocksci.Blockchain(utils.blocksci_config())

In [7]:
my_cm = blocksci.cluster.ClusterManager(utils.latest_clustering(), chain)

### Select and store relevant transactions

We store the indexes of the relevant transactions so we do not need to every time we iterate over the transactions (which we do for every heuristic)

In [8]:
cluster_member = blocksci.heuristics.change.cluster_member(my_cm)

In [9]:
address_reuse = blocksci.heuristics.change.address_reuse

In [10]:
def relevant_txes():
    return chain.blocks.txes.where(lambda tx: ~tx.is_coinbase).where(lambda tx: tx.output_count == 2).where(lambda tx: ~tx.outputs.any(lambda o: o.address_type == blocksci.address_type.nulldata)).where(lambda tx: address_reuse(tx).size == 0).where(lambda tx: cluster_member(tx).size == 0)

In [11]:
%time tx_indexes = relevant_txes().index

CPU times: user 16min 21s, sys: 36.2 s, total: 16min 57s
Wall time: 18min 29s


In [12]:
len(tx_indexes)

309653054

In [13]:
store_txes = blocksci.GroundTruth.create(chain, tx_indexes, "/home/ubuntu/Data/remaining/20210719-two-out")

In [14]:
assert store_txes.transactions().size == len(tx_indexes)

In [15]:
stored_indexes = store_txes.transactions().index

In [16]:
assert (tx_indexes == stored_indexes).all()

In [17]:
del tx_indexes, stored_indexes

### Export mask for whether all outputs are spent

In [19]:
all_spent = store_txes.transactions().map(lambda tx: tx.outputs.all(lambda o: o.is_spent))

In [20]:
len(all_spent)

309653054

In [21]:
np.save("/home/ubuntu/Data/remaining/20210719_all_spent_mask.npy", all_spent)