# install packages

In [31]:
pip install neo4j

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [32]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [33]:
from neo4j import GraphDatabase
import multiprocessing as mp
import pandas as pd
import os
import pickle

# define connector

In [34]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

conn = Neo4jConnection(uri="neo4j://127.0.0.1:7687", user="team", pwd="F0110wTh€M0n€y")

# query terror addresses

In [35]:
#The below code is returning a list of all the addresses that are marked as terror addresses.
#Query takes around 10 min

def returnTerrorAddresses():

    query_string = '''
    MATCH (a:Address {isTerror: True})
    Return a.address
    '''

    response = conn.query(query_string, db='neo4j')
    terrorAddresses = [r[0] for r in response]
    return terrorAddresses

In [36]:
# comment out entire block if old terrorAddressList saved as pickle should be used
def createTerrorAddressList():
    terrorAddressList = returnTerrorAddresses()

    # save terrorAddressList to file

    with open('terrorAddressList.pickle', 'wb') as export:
        pickle.dump(terrorAddressList, export)

In [37]:
# use existing blacklist if exists
if not os.path.exists('terrorAddressList.pickle'):
    createTerrorAddressList()

terrorAddressList = pickle.load(open('terrorAddressList.pickle', 'rb'))

# remove unwanted addresses

In [38]:
# # allows blacklist of addresses in json format. json file retrieved with https://github.com/nicotom/walletexplorer_scrapy
# 
# def createBlacklist():
#   dirname = os.path.dirname(os.path.realpath('__file__'))
#   folder = os.path.join(dirname, 'exchange_addresses\\json\\')
#   
#   def flatten(t):
#       return [item for sublist in t for item in sublist]
#   
#   blacklistAddresses = []
#   
#   # iterate over collection of exchange addresses in csv format to create blacklist
#   # attention, lists are not complete yet!
#   
#   for file in os.listdir(folder):
#       df = pd.read_json(os.path.join(folder, file))
#       blacklistAddresses.append(df['addresses'].tolist())
#       continue
#   
#   flatten(flatten(blacklistAddresses))

In [39]:
# allows blacklist of addresses in csv format
blacklistAddresses = []
def createBlacklist():
    dirname = os.path.dirname(os.path.realpath('__file__'))
    folder = os.path.join(dirname, 'exchange_addresses\\csv\\')
    
    def flatten(t):
        return [item for sublist in t for item in sublist]
      
    # iterate over collection of exchange addresses in csv format to create blacklist
    # attention, lists are not complete yet!
    
    for file in os.listdir(folder):
        df = pd.read_csv(os.path.join(folder, file), header=0)
        blacklistAddresses.append(df['hashAdd'].tolist())
        continue
    
    flatten(blacklistAddresses)
    
    #export blacklistAddresses to file
    
    with open('blacklistAddresses.pickle', 'wb') as export:
        pickle.dump(blacklistAddresses, export)

In [40]:
# use existing blacklist if exists
if not os.path.exists('blacklistAddresses.pickle'):
    createBlacklist()

blacklistAddresses = pickle.load(open('blacklistAddresses.pickle', 'rb'))

In [41]:
# to be used for removal of exchange addresses
interestingAddresses = []
def addressCleanUp(addressList, blacklist):
    for address in addressList:
        if address not in blacklist:
            interestingAddresses.append(address)
    return interestingAddresses

In [42]:
def createAddressesToClusterList():
    addressesToCluster = addressCleanUp(terrorAddressList, blacklistAddresses)

    # save addressesToCluster to file

    with open('addressesToCluster.pickle', 'wb') as export:
        pickle.dump(addressesToCluster, export)


In [43]:
# load addressesToCluster if exists
if not os.path.exists('addressesToCluster.pickle'):
    createAddressesToClusterList()

addressesToCluster = pickle.load(open('addressesToCluster.pickle', 'rb'))


# cluster wallets

In [5]:
mihTemplate = '''
MATCH (:Address{address:"%s"})-[:SENDS]->(t:Transaction),
(walletMember:Address)-[:SENDS]->(t:Transaction)
//where t.outDegree <= 1
RETURN DISTINCT walletMember.address
'''

In [6]:
def multiInputClustering(address, walletAddresses):
    response = conn.query(mihTemplate % address, db='neo4j')
    newAddresses = [r[0] for r in response]
    newAddresses = [a for a in newAddresses if a not in walletAddresses]
    walletAddresses += newAddresses
    for a in newAddresses:
        walletAddresses += multiInputClustering(a, walletAddresses)
    return list(set(walletAddresses))

In [10]:
#same logic as Jochen's code; However, extremly large bitcoin wallets get excluded sofar because
#they take too long to be clustered. Example address: 13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc has more than 137k
#addresses in the wallet according to walletexplorer.com
#What should we do with them? I tried to solve it with multiprocessing but didn't help

def iterMultiInputClustering(address, walletIndex):
    walletString = "Terror-wallet ID"
    walletAddresses = []
    response = conn.query(mihTemplate % address, db='neo4j')
    newAddresses = [r[0] for r in response]
    walletAddresses = [newAddresses, walletString +str(walletIndex)]
    for walletAddress in walletAddresses[0]:
        if(len(walletAddresses[0])) >1000:
            return [address , "Too many results"];
        response = conn.query(mihTemplate % walletAddress, db='neo4j')
        newAddresses = [r[0] for r in response]
        newAddresses = [a for a in newAddresses if a not in walletAddresses[0]]
        walletAddresses[0] += newAddresses
    return walletAddresses

#walletIndex = 1
#wallets = iterMultiInputClustering("13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc", walletIndex)
#1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR
#wallets


In [12]:
def callInputClustering():
    index = 0
    walletIndex = 1
    #["1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR", "13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc", "1MMaU5nTrFdPZotfwdbv1wWnFjLCTFbpPY", "17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD"]
    
    terrorCluster = []
    while index < len(addressesToCluster):
        terrorCluster +=iterMultiInputClustering(addressesToCluster[index], walletIndex)
        index +=1 
        walletIndex +=1
    return terrorCluster

In [13]:
# input = callInputClustering()
# 
# def flagAdditionalTerrorAdresses(input):
#     query = """
#     MATCH (a:Address {address: "%s"})
#     set a.isTerrorMultiInput = True,
#     a.terrorWallet = "%s"
#     Return a.address
#     """
#     indexWallets = 0
#     while indexWallets < len(input):
#         for address in input[indexWallets]:
#             walletID = input[indexWallets+1]
#             conn.query(query % (address, walletID), db='neo4j')
#         indexWallets += 2
# # flagAdditionalTerrorAdresses(input)
# 
# 