# INSTALL: packages

In [1]:
pip install ipynb

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [2]:
pip install graphviz

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [3]:
import os
import pickle
from WalletClustering_MIH import iterMultiInputClustering_chunks
from graphviz import Digraph
import pandas as pd

# EXECUTE: WalletClustering_AddressIdentification notebook

In [4]:
%run WalletClustering_AddressIdentification.ipynb # also includes running WalletClustering_neo4jConnect notebook
# methods & variables of notebook can be referenced

In [None]:
# only to remember lists
assocBlacklist = pickle.load(open('output\\assocBlacklist.pickle', 'rb'))

# DEFINE: Make graph of flows

## check for blacklisted association

In [None]:
#check if association is part of blacklisted associations
def assocBlacklistCheck(association, blacklistToCheckAgainst = assocBlacklist):
    if association in assocBlacklist:
        return True
    else:
        return False

## query for input address association

In [None]:
def getAssoc(address, blacklistCheck = 'on'):
    # return association of input address
    assocTemplate = '''
    MATCH (a:Address {address: "%s"})
    RETURN a.association
    '''

    association = conn.query(assocTemplate % address, db='neo4j')
    
    if association[0][0] is None:
        # run MI heuristic on address and query again
        addresses, association = iterMultiInputClustering_chunks(address) #should also flag new association in neo4J
        return association

    else:
        if blacklistCheck == 'on':
            if assocBlacklistCheck(association, assocBlacklist):
                #throw exception in case association is blacklisted
                raise Exception('Blacklist')
            else:
                return association[0][0]
        else:
            return association[0][0]

## gather input and output addresses to association

In [None]:
def get_Inputs(query_Input):
    response = conn.query(query_Input, db='neo4j')
    rows_list = []
    for record in response:
        dict1 = {}
        new_row = {'Input_Addresses':record[0]._properties["address"],
        'Value_Received_from_Inp':record[1], 'Value_Send_from_Inp':record[2], 
        'Transaction_ID':record[3], 'Association':record[4]}
        dict1.update(new_row)
        rows_list.append(dict1)
    input_df = pd.DataFrame(rows_list) 
    return input_df  

In [None]:
def get_Outputs(query_Output):
    response = conn.query(query_Output, db='neo4j')
    rows_list = []
    for record in response:
        dict1 = {}
        new_row = {'Output_Addresses':record[0]._properties["address"],
        'Value_send_to_Outp':record[1], 'Value_rec_from_Outp':record[2],
        'Transaction_ID':record[3], 'Association':record[4]}
        dict1.update(new_row)
        rows_list.append(dict1)
    output_df = pd.DataFrame(rows_list) 
    return output_df

In [None]:
#get all addresses and their associations that are either output or input to an association (user)
def getInputs_Outputs(association):
    
    query_for_Outputs = """
    Match (a:Address {association: "%s"})-[send:SENDS]->(t:Transaction)-[r:RECEIVES]->(out:Address)
    return out as Output_Addresses, send.value as Value_send_to_Outp,r.value as Value_rec_from_Outp,
     t.txid as Transaction_ID, out.association as Association"""

    userOutput = get_Outputs(query_for_Outputs % association)

    query_for_Inputs = """
    Match (input:Address)-[s:SENDS]->(t:Transaction)-[r:RECEIVES]->(a:Address {association: "%s"})
    return input as Input_Addresses, r.value as Value_Received_from_Inp,s.value as Value_Send_from_Inp,
    t.txid as Transaction_ID, input.association as Association"""

    
    userInput = get_Inputs(query_for_Inputs % association)

    return userOutput, userInput

## query for input and output address asociations

In [None]:
def buildFlowGraph(queryInput, queryType):
    if queryType == 'Address':
        print('Queried address: '+queryInput)
        try:
            association = getAssoc(queryInput)
            print('Association of queried address: '+association)
        except:
            raise Exception('Association of queried address is blacklisted.')
    elif queryType == 'Association':
        print('Queried association: '+queryInput)
        if not assocBlacklistCheck(queryInput, assocBlacklist):
            association = queryInput
        else:
            raise Exception('Queried association is blacklisted.') 
    else:
        raise Exception('Define query type "Address" or "Association".') 
    
    nodes = []
    nodes.append(association)

    outputs, inputs = getInputs_Outputs(association)
    
    # drop duplicates to avoid running MIH multiple times on same address
    outputs = outputs.drop_duplicates('Output_Addresses', keep='first')
    inputs = inputs.drop_duplicates('Input_Addresses', keep='first')
    print(outputs)
    print(inputs) 
   
    outputAssocs = []
    for index, o in outputs.iterrows():
        if o['Association'] is not None:
            outputAssocs.append(o['Association'])
        else:
           outputAssocs.append(getAssoc(o['Output_Addresses'], 'Blacklist_allowed')) #allows blacklisted associations to be shown as receiving nodes. 
    outputAssocs = list(set(outputAssocs))
    print('Receiving associations of outgoing transactions:')   
    print(outputAssocs)
    nodes.append(outputAssocs)
    
    inputAssocs = []
    for index, i in inputs.iterrows():
        if i['Association'] is not None:
            inputAssocs.append(i['Association'])
        else:
            inputAssocs.append(getAssoc(i['Input_Addresses'], 'Blacklist_allowed')) #allows blacklisted associations to be shown as sending nodes.
    inputAssocs = list(set(inputAssocs))
    print('Sending associations of incoming transactions:')
    print(inputAssocs)
    nodes.append(inputAssocs)

    #export nodes to file for dahsboard
    with open('output\\flowGraph_nodes.pickle', 'wb') as export:
        pickle.dump(nodes, export)

    outputEdges, inputEdges = getInputs_Outputs(association)
    
    #export input edges to file for dashboard
    with open('output\\flowGraph_inEdges.pickle', 'wb') as export:
        pickle.dump(inputEdges, export)
    
    #export output edges to file for dashboard
    with open('output\\flowGraph_outEdges.pickle', 'wb') as export:
        pickle.dump(outputEdges, export)

    print(outputs)
    print(inputs)

    # build graph (adjust to use case)
    dot = Digraph()
    dot.node(association, association)
        
    for outNode in outputAssocs:
        dot.node(outNode, outNode)
        dot.edge(association, outNode)

    for inNode in inputAssocs:
        dot.node(inNode, inNode)
        dot.edge(inNode, association)
        
    return dot

# RUN: Make graph of flows

In [None]:
# queriedAddress = '12sDU3FyYJXc2oRzE6XXuuhVHCBJvaoCC8'
# queriedAssociation = 'yYiOdGN2a2TGR8GBwrMtk1CkMAFUcaan'

queriedAddress = '1DMcUNysqADX5hJ3naTsEM64GnSpe3nf7y'
# queriedAssociation = 'ze3ws3ckYylbZwpXj6xNRXSBeRkenIzY'

# queriedAddress = '37sSxTNWhMTN17zAXUzT1fu3sdM9qf1vhm'


In [None]:
if queriedAddress:
    graph = buildFlowGraph(queriedAddress,'Address')
    graph.render(view=True)
elif queriedAssociation:
    graph = buildFlowGraph(queriedAssociation,'Association')
    graph.render(view=True)
else:
    raise Exception('Please insert address or association to query.')

Queried address: 1DMcUNysqADX5hJ3naTsEM64GnSpe3nf7y
Association of queried address: ze3ws3ckYylbZwpXj6xNRXSBeRkenIzY
                           Output_Addresses  Value_send_to_Outp  \
0        1FoWyxwPXuj4C6abqwhjDWdz6D4PZgYRjA                 888   
1        1PzCxZWHQBYkevm3khCfqNUPgc4R8Hqhmc                 888   
2        1DMcUNysqADX5hJ3naTsEM64GnSpe3nf7y           500000000   
1753     1KXByq2NKKgjJPVHSs6uztrVM7LYVQkYWe             2000000   
1754     1KHRNxJz2sTdJuWHxHAxp8c21SAP8Sj5dr             2000000   
...                                     ...                 ...   
1860499  1CnGv7bA5LrD22paaxNJAWatzndv1fpzro            10466467   
1860509  1EQ98M9Y9ZzXrsptLZTbWZZURGrmKSWhmU             1046635   
1860515  1KK1yzW1UPxZRGshQhyjyrYD4cZ15NNdAU            10466258   
1860541  1Nf53LhGzj6cae5ZoGXnyoaeFemp8hFE6r            10465922   
1860547  39bcLK519ke6xQPjgNkJwiZEQQPCobUpRA            10465892   

         Value_rec_from_Outp  \
0                      50000   
1             