# install packages

In [1]:
from neo4j import GraphDatabase
import multiprocessing as mp
import pandas as pd

# define connector

In [3]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

conn = Neo4jConnection(uri="neo4j://127.0.0.1:7687", user="team", pwd="F0110wTh€M0n€y")

# query terror addresses

In [4]:
#The below code is returning a list of all the addresses that are marked as terror addresses.
#Query takes around 10 min
def returnTerrorAddresses():

    query_string = '''
    MATCH (a:Address {isTerror: True})
    Return a.address
    '''

    response = conn.query(query_string, db='neo4j')
    terrorAddresses = [r[0] for r in response]
    return terrorAddresses

# cluster wallets

In [5]:
mihTemplate = '''
MATCH (:Address{address:"%s"})-[:SENDS]->(t:Transaction),
(walletMember:Address)-[:SENDS]->(t:Transaction)
//where t.outDegree <= 1
RETURN DISTINCT walletMember.address
'''

In [6]:
def multiInputClustering(address, walletAddresses):
    response = conn.query(mihTemplate % address, db='neo4j')
    newAddresses = [r[0] for r in response]
    newAddresses = [a for a in newAddresses if a not in walletAddresses]
    walletAddresses += newAddresses
    for a in newAddresses:
        walletAddresses += multiInputClustering(a, walletAddresses)
    return list(set(walletAddresses))

In [10]:

#same logic as Jochen's code; However, extremly large bitcoin wallets get excluded sofar because
#they take too long to be clustered. Example address: 13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc has more than 137k
#addresses in the wallet according to walletexplorer.com
#What should we do with them? I tried to solve it with multiprocessing but didn't help
def iterMultiInputClustering(address, walletIndex):
    walletString = "Terror-wallet ID"
    walletAddresses = []
    response = conn.query(mihTemplate % address, db='neo4j')
    newAddresses = [r[0] for r in response]
    walletAddresses = [newAddresses, walletString +str(walletIndex)]
    for walletAddress in walletAddresses[0]:
        if(len(walletAddresses[0])) >1000:
            return [address , "Too many results"];
        response = conn.query(mihTemplate % walletAddress, db='neo4j')
        newAddresses = [r[0] for r in response]
        newAddresses = [a for a in newAddresses if a not in walletAddresses[0]]
        walletAddresses[0] += newAddresses
    return walletAddresses



#walletIndex = 1
#wallets = iterMultiInputClustering("13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc", walletIndex)
#1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR
#wallets


In [12]:
def callInputClustering():
    index = 0
    walletIndex = 1
    terrorAddresses = returnTerrorAddresses()
    #["1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR", "13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc", "1MMaU5nTrFdPZotfwdbv1wWnFjLCTFbpPY", "17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD"]
    
    terrorCluster = []
    while index < len(terrorAddresses):
        terrorCluster +=iterMultiInputClustering(terrorAddresses[index], walletIndex)
        index +=1 
        walletIndex +=1
    return terrorCluster

In [13]:
input = callInputClustering()

def flagAdditionalTerrorAdresses(input):
    query = """
    MATCH (a:Address {address: "%s"})
    set a.isTerrorMultiInput = True,
    a.terrorWallet = "%s"
    Return a.address
    """
    indexWallets = 0
    while indexWallets < len(input):
        for address in input[indexWallets]:
            walletID = input[indexWallets+1]
            conn.query(query % (address, walletID), db='neo4j')
        indexWallets += 2
flagAdditionalTerrorAdresses(input)



In [None]:
#one time change address examples: 3CJNYYeJa4GMN19nLx4rdUjW5hzXXKR8xB,
# attempt to recreate the one time change address heuristic
#WITH * WHERE result = 0
"""
Match (i:Address {isTerrorMultiInput: True})
//There is no address among the outputs that also appears in the inputs (self-change address);
CALL{
    with i
    Match (i:Address ) -[:SENDS]-> (t:Transaction)-[:RECEIVES]->(o:Address)
    with i.address as inputAddresses,o.address as outputAddresses
    return 
    sum(case when inputAddresses in outputAddresses then 1 else 0 end) as result
}
WITH * WHERE result = 0
return i.address
limit 5

"""
""" 
Match (o:Address)
//There is no address among the outputs that also appears in the inputs (self-change address);
CALL{
    with o
    Match (i:address) -[:SENDS]-> (t:Transaction)-[:RECEIVES]->(o)
    with i.address as inputAddresses,o.address as outputAddresses
    return 
    sum(case when inputAddresses in outputAddresses then 1 else 0 end) as frequencyInOutput
}
CALL{
    //Not solved yet
    //This is the first appearance of address O
    //(4) The output addresses other than O do not satisfy condition above.
    with o
    Match   (o)<-[:Receives]-(t:Transaction),
            (a:address)<-[:Receives]-(t)
    return count(t.txid) as firstAppearance, count(a.address) as anotherTransaction
}
WITH * WHERE frequencyInOutput = 0 and firstAppearance = 1 //and anotherTransaction > 1
return o.address
limit 5

//The transaction t is not coin generation; Not finished yet but shouldn't be a problem
Match   (o:Address {address: "112cXA686grivLWLurd1AuAMAghP8hcdjJ"})<-[:RECEIVES]-(t:Transaction),
            (a:Address)<-[:RECEIVES]-(t)
return count(t.txid) as firstAppearance, count(a.address) as anotherTransaction
limit 10

Match (b:Block {hash: "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f"})
return b
limit 5
"""