# Simplify transaction

### This a pre-processing procedure that is applied to every transaction. Simplification means checking if the same address appears more than once among the inputs and outputs, and merging it into one input or output (depending whether more BTC flows in or out for an address). 

Definition of simplification can be found in paper Shared Send Untangling https://bitfury.com/content/downloads/bitfury_whitepaper_shared_send_untangling_in_bitcoin_8_24_2016.pdf

In [1]:
def simplify(tx):
    
    new_tx = {'inputs':[],'outputs':[]}
    input_addresses = [i['address'] for i in tx['inputs']]
    output_addresses = [j['address'] for j in tx['outputs']]

    addresses = [i['address'] for i in tx['inputs']] + [j['address'] for j in tx['outputs']]
    addresses_set = list(set(addresses))
    
    repeated_input_addresses = [address for address in addresses_set if addresses.count(address) > 1]
    repeated_output_addresses = [address for address in addresses_set if addresses.count(address) > 1]
    
    once_appeared_input_addresses = [address for address in input_addresses if addresses.count(address) == 1]
    once_appeared_output_addresses = [address for address in output_addresses if addresses.count(address) == 1]
    
    for address in once_appeared_input_addresses:
        for i in tx['inputs']:
            if i['address'] == address:
                new_tx['inputs'].append(i)

    for address in once_appeared_output_addresses:
        for i in tx['outputs']:
            if i['address'] == address:
                new_tx['outputs'].append(i)

    for address in repeated_input_addresses:
        if address in input_addresses and address not in output_addresses:
            new_tx['inputs'].append({
                'address':address,
                'value':sum([i['value'] for i in tx['inputs'] if i['address'] == address])
                })

    for address in repeated_output_addresses:
        if address in output_addresses and address not in input_addresses:
            new_tx['outputs'].append({
                'address':address,
                'value':sum([i['value'] for i in tx['outputs'] if i['address'] == address])
                })
        else:
            if sum([i['value'] for i in tx['outputs'] if i['address'] == address]) > sum([i['value'] for i in tx['inputs'] if i['address'] == address]):
                new_tx['outputs'].append({
                'address':address,
                'value':sum([i['value'] for i in tx['outputs'] if i['address'] == address]) - sum([i['value'] for i in tx['inputs'] if i['address'] == address])
                })
            else:
                new_tx['inputs'].append({
                'address':address,
                'value':sum([i['value'] for i in tx['inputs'] if i['address'] == address]) - sum([i['value'] for i in tx['outputs'] if i['address'] == address])
                })

    return new_tx

Here is how it is supposed to work on a transaction:

In [2]:
t = {
    'inputs':[{'address':'a', 'value':5},
              {'address':'b', 'value':5},
              {'address':'c', 'value':10.1}],
    'outputs':[{'address':'a', 'value':4},
               {'address':'b', 'value':6},
               {'address':'d', 'value':10}]
}

In [3]:
simplify(t)

{'inputs': [{'address': 'c', 'value': 10.1}, {'address': 'a', 'value': 1}],
 'outputs': [{'address': 'd', 'value': 10}, {'address': 'b', 'value': 1}]}