# install packages

In [2]:
from neo4j import GraphDatabase
import multiprocessing as mp
import pandas as pd
import os
import pickle
import time

# define connector

In [3]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

conn = Neo4jConnection(uri="neo4j://127.0.0.1:7687", user="team", pwd="F0110wTh€M0n€y")

# cluster wallets

In [4]:
mihTemplate = '''
MATCH (:Address{address:"%s"})-[:SENDS]->(t:Transaction),
(walletMember:Address)-[:SENDS]->(t:Transaction)
RETURN DISTINCT walletMember
'''
#address

In [5]:
mihWhere = """MATCH (a:Address)-[:SENDS]->(t:Transaction), (walletMember:Address)-[:SENDS]->(t:Transaction) 
where a.address in [\"{0}\",\"{1}\",\"{2}\",\"{3}\",\"{4}\",\"{5}\",\"{6}\",\"{7}\",\"{8}\",\"{9}\"]
RETURN DISTINCT walletMember"""

'MATCH (a:Address)-[:SENDS]->(t:Transaction), (walletMember:Address)-[:SENDS]->(t:Transaction) \nwhere a.address in ["{0}","{1}","{2}","{3}","{4}","{5}","{6}","{7}","{8}","{9}"]\nRETURN DISTINCT walletMember'

In [6]:
def updateWalletAddresses(address, walletName):
  
  query = """CALL apoc.periodic.iterate( 'MATCH (a:Address {address: "%s"})-[:SENDS]->(t:Transaction), (walletMember:Address)-[:SENDS]->(t:Transaction) RETURN  walletMember',
  'set walletMember.terrorWallet = "%s"', {batchSize:1000, parallel:true})""" % (address, walletName)
  return query


In [13]:
# Iterating through the addresses and finding all the addresses that are connected to the input address.
# Store all responses in a dictionary and instead of looping over every item and adding only new Addresses to the list,
# write all records of a response into the dictionary. They addresses are the keys
# additionally use a batched version if the amount of retrieved records is greater than 10
def iterMultiInputClustering(address):

    # create initial set of addresses
    walletAddresses = {address: 1}
    response = conn.query(mihTemplate % address, db='neo4j')

    # store every found address as key in the dictionary, values do not matter here, so we just pass 1
    for record in response:
       walletAddresses [record[0]._properties["address"]]= 1 
    
    i = 1
    
    while i < len(walletAddresses):

        # generate a list of the keys to get an index; this is necessary for the batching
        list_ofKeys = list(walletAddresses.keys())
        
        # if there are less than 10 addresses left between i and the maximum; then no batching is possible
        if len(walletAddresses) - i <= 10 :

            response = conn.query(mihTemplate % list_ofKeys[i], db='neo4j')
            
            # this automatically resolves duplicates. Instead of iterating over every address one by one in the list and comparing them with the existing set, 
            # this is more faster since the dictionaries are actually hash tables. So it reaches less than logarithmic runtime
            for record in response:
                walletAddresses [record[0]._properties["address"]]= 1 
            i += 1
            list_ofKeys = list(walletAddresses.keys())
        
        
       # batching 10 addresses at once to avoid querying every single transaction in the dictionary
       # only possible if there are more than 10 addresses left in the dictionary
       # question is if we can further improve this... like with 500 and a function in between that creates a string
        while 10 < len(walletAddresses) - i:

            response = conn.query(mihWhere.format(list_ofKeys[i], list_ofKeys[i+1], list_ofKeys[i+2], list_ofKeys[i+3], list_ofKeys[i+4], list_ofKeys[i+5]
            , list_ofKeys[i+6], list_ofKeys[i+7], list_ofKeys[i+8], list_ofKeys[i+9] ), db='neo4j')

            #same as above
            for record in response:
                walletAddresses [record[0]._properties["address"]]= 1
            list_ofKeys = list(walletAddresses.keys())
            
    
    print("Updating ..." + str(len(walletAddresses)))
    
    #for walletAddress in walletAddresses[0]:
    #   updateWalletAddresses(walletAddress, walletString)
    #return walletAddresses

wallets = iterMultiInputClustering("13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc")

#output of 13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc:  
"""137373
Execution time Query: 3572.6374530792236 seconds
Execution time inWallet: 158.30259037017822 seconds
Execution time IF: 0.022434473037719727 seconds   | Going into statement:  2  times."""

137373
Execution time Query: 3572.6374530792236 seconds
Execution time inWallet: 158.30259037017822 seconds
Execution time IF: 0.022434473037719727 seconds   | Going into statement:  2  times.
Updating ...137373


In [None]:
# additional remarks and examples
#print(len(wallets))
#1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR '1PeSDEMzi7nj1ah4YFcgnRmijWpgQqP3Yp' '1P963yWMBFkUouU2Me7cQ6136orZDD4gTf' '1KFiRjjvE4rtheuEYGo9VeDDBvGgmm7nRg' exchanges: Btc38.com-1CELa15H4DMzHtHnuz7LCpSFgFWf61Ra6A, 
# QuadrigaCX.com-1LQF9Suqgm4YtxY6kriiE8DJftNTPTqwAm, CoinHako.com- 3PpSAGEGfA9e995bpCkAFdKaw3fMmo8Eyw, MaiCoin.com - 1Lfktsua4x25UcsqDeuXUrXZq3jSoPpJ1b, Hashnest - 1D7JStLYKJ2ma6yfH7a7DXSom5ZPfyfNM3
#wallets
#no batching: 1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR - 7sec, 13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc - cancelled after 4 hours, 
#batching with 6: 1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR - 1.3sec, 13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc - cancelled after 6 hours