# INSTALL: packages

In [1]:
pip install nbformat

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [2]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [3]:
import pandas as pd
import os
import pickle
import WalletClustering_neo4jConnect

In [4]:
conn = WalletClustering_neo4jConnect.conn

# DEFINE: other methods

Were used in older versions of this code. Kept for consistency of other notebooks.

In [5]:
# flattens list of lists
def flatten(list):
    return [item for sublist in list for item in sublist]

# DEFINE: get interesting addresses excluding blacklisted addresses

## create output directory

In [6]:
try:
    os.mkdir('output')
except:
    pass

## query terror addresses

In [7]:
#The below code is returning a list of all the addresses that are marked as terror addresses.
#Query takes around 10 min
def returnTerrorAddresses():

    query_string = '''
    MATCH (a:Address {isTerror: True})
    Return a.address
    '''

    response = conn.query(query_string, db='neo4j')
    terrorAddresses = [r[0] for r in response]
    return terrorAddresses

In [8]:
# comment out entire block if old terrorAddressList saved as pickle should be used
def createTerrorAddressList():
    terrorAddressList = returnTerrorAddresses()
  
    # save terrorAddressList to file
    with open('output\\terrorAddressList.pickle', 'wb') as export:
        pickle.dump(terrorAddressList, export)

## create blacklist from csv

In [9]:
# allows blacklist of addresses in json format
def createBlacklistCSV():
    blacklistAddresses = pd.DataFrame(columns =['address', 'association'])

    dirname = os.path.dirname(os.path.realpath('__file__'))

    if os.path.isdir('..\\EntityAddressBitcoin\\'):        
        df = pd.read_csv(os.path.join(os.path.join(dirname, '..\\EntityAddressBitcoin\\'), 'Exchanges_full_detailed.csv'), usecols=['association', 'hashAdd'])[['hashAdd', 'association']]
        df.columns=['address', 'association']
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)

    #export blacklistAddresses to file
    with open('output\\blacklistAddresses.pickle', 'wb') as export:
        pickle.dump(blacklistAddresses, export)

## create blacklist from json

In [10]:
# allows blacklist of addresses in json format
def createBlacklist():
  
  # first create CSV blacklist, then append JSON
  createBlacklistCSV()
  
  if os.path.exists('output\\blacklistAddresses.pickle'):
    blacklistAddresses = pickle.load(open('output\\blacklistAddresses.pickle', 'rb'))
  else:
    blacklistAddresses = pd.DataFrame(columns =['address', 'association'])
  
  dirname = os.path.dirname(os.path.realpath('__file__'))
  
  # iterate over collection of exchange addresses in json format to create blacklist
  if os.path.isdir('..\\WalletExplorerScraper\\Output\\exchange\\'):
    outputScrapeExchangeFolder = os.path.join(dirname, '..\\WalletExplorerScraper\\Output\\exchange\\')
    for subfolder in os.listdir(outputScrapeExchangeFolder):
        print(subfolder)
        df = pd.read_json(os.path.join(outputScrapeExchangeFolder, subfolder, 'scraped_addresses.json'))
        df.columns=['address']
        df['association'] = subfolder.split("_",1)[1]
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
        continue
  
  if os.path.isdir('..\\WalletExplorerScraper\\Output\\mixer\\'):
    outputScrapeMixerFolder = os.path.join(dirname, '..\\WalletExplorerScraper\\Output\\mixer\\')
    for subfolder in os.listdir(outputScrapeMixerFolder):
        print(subfolder)
        df = pd.read_json(os.path.join(outputScrapeMixerFolder, subfolder, 'scraped_addresses.json'))
        df.columns=['address']
        df['association'] = subfolder.split("_",1)[1]
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
        continue

  if os.path.isdir('..\\WalletExplorerScraper\\Output\\giant_wallet\\'):
    outputScrapeGiantWalletFolder = os.path.join(dirname, '..\\WalletExplorerScraper\\Output\\giant_wallet\\')
    for subfolder in os.listdir(outputScrapeGiantWalletFolder):
        print(subfolder)
        df = pd.read_json(os.path.join(outputScrapeGiantWalletFolder, subfolder, 'scraped_addresses.json'))
        df.columns=['address']
        df['association'] = subfolder.split("_",1)[1]
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
        continue
      
  blacklistAddresses['association'] = blacklistAddresses['association'].str.lower()
  blacklistAddresses = blacklistAddresses.drop_duplicates('address', keep='last')

  #export blacklistAddresses to file
  with open('output\\blacklistAddresses.pickle', 'wb') as export:
    pickle.dump(blacklistAddresses, export)

## create list of blacklisted associations

In [17]:
def createAssocBlacklist():
    assocBlacklistDf = pickle.load(open('output\\blacklistAddresses.pickle', 'rb'))
    assocBlacklistDf = assocBlacklistDf.drop_duplicates('association', keep='first')
    assocBlacklist = assocBlacklistDf['association'].tolist()
    
    otherServicesListDf = pickle.load(open('output/blacklistAddressesOSMIH.pickle', 'rb')) # list of other services addresses that have been clustered with MIH
    otherServicesListDf = otherServicesListDf.drop_duplicates('otherService', keep='last')
    otherServicesList = otherServicesListDf['otherService'].tolist()
    assocBlacklist.extend(otherServicesList)

    with open('output\\assocBlacklist.pickle', 'wb') as export:
        pickle.dump(list(set(assocBlacklist)), export)


## remove blacklisted addresses from addresses of interest

In [12]:
# to be used for removal of exchange addresses
def addressCleanUp(addressList, blacklist):
    interestingAddresses = []
    for address in addressList:
        if not blacklist['address'].str.contains(address).any():
            interestingAddresses.append(address)
    return interestingAddresses

In [13]:
def createAddressesToClusterList():
    addressesToCluster = addressCleanUp(terrorAddressList, blacklistAddresses)

    # save addressesToCluster to file
    with open('output\\addressesToCluster.pickle', 'wb') as export:
        pickle.dump(addressesToCluster, export)

# EXECUTE: get interesting addresses excluding blacklisted addresses

In [14]:
# use existing terrorAddressList if exists
if not os.path.exists('output\\terrorAddressList.pickle'):
    createTerrorAddressList()

terrorAddressList = pickle.load(open('output\\terrorAddressList.pickle', 'rb'))
print(terrorAddressList)

['12sDU3FyYJXc2oRzE6XXuuhVHCBJvaoCC8', '1348ThkNoDupq1bws95diMiL8haGs61K7M', '13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc', '13Pcmh4dKJE8Aqrhq4ZZwmM1sbKFcMQEEV', '15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf', '15soXrE3NJBMkkQhrccXonTT9bpjpPvE67', '164fawNZVwsR5SamAJypvCMtkMx4Xv1B3f', '179bzhS4FY7qLDza9YjuorhWyXVVYZu2YH', '17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', '17UUXDzPGkMwWrabhtk7YCha88tSoua2Vr', '19D1iGzDr7FyAdiy3ZZdxMd6ttHj1kj6WW', '19XVEDZCGVMA9WCF1qUayxtnjUnyD7zDDQ', '1A7pDH1EdrkH9YZtsPnc8uzirBFnAN9Eay', '1BPf9qr7M5xUgNHUYtrQtEKvUKcyERzXao', '1C6hetVWVXZnS6P2BYBNu5Y1ZJ57JyXGac', '1DrhHEkv42JVwiDQNi28JFdSuiSGgPNXwP', '1EDcKCRypUTFoTZbxDWF9MBAT4W7XUGB32', '1EfmRn6Bp3cjrTBubaH8MzRRc2ikSjNGXw', '1EnX6BuJiGWydqXJT9BN5dSvfLg3QW4Mdz', '1EVTZmTMqZPMzGxsug9TXBtvPJZH8dXSCK', '1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR', '1GALPyvUDDXqA6H2eHQ9Y1yidfQ6T1Drvn', '1GC2SjzCyCwxo1uxTi28oqn9L3mJj7bLPs', '1Gg25VzQkqCizXHNSNet4RoysLEe19su4s', '1JpSBaUwrZaEgmsYka7mzm9t3Z4syyaw7A', '1LhRW1msre1cFgT7fBY2BRrZ4ANMPwVj9u', '1Lm9BCDUKo

In [15]:
# use existing blacklist if exists
if not os.path.exists('output\\blacklistAddresses.pickle'):
    createBlacklist()

blacklistAddresses = pickle.load(open('output\\blacklistAddresses.pickle', 'rb'))
print(blacklistAddresses)

                                     address              association
1         1CrhRvGShwmQkrGcqFzDDwDDBiEhKpHAqF               anxpro.com
2         1AbH5pFqocUYVhRqPAtJruVr5DMAxEANmn               anxpro.com
4         1FM8vDXinj2N5LoTXX5J4MSGTCn2pPkqRm               anxpro.com
7         18g2RSp4MxzkyNkigpYDejP2jMgkJ2Po9A               anxpro.com
18        195rycdSmRf1tQDztryRkZWKaFgFwM1PPG              bittrex.com
...                                      ...                      ...
21606795  1JiTqsNvfjkGSYVHMKHpWbNwEfN2LUhjKM  unknown_large_wallet_03
21606796  1HoNkBpNPkcznGWe6nUeLqffeLQN1TwhFJ  unknown_large_wallet_03
21606797  1BmvAcQPLcFve9KpNigfF591PyQnZwNDyQ  unknown_large_wallet_03
21606798  1GNw9S6hSzVQ83gUkP36ypjyrH3d6YZRm7  unknown_large_wallet_03
21606799  1McAmDN95SGxaTMmymaN8QzzTGJADM3aZy  unknown_large_wallet_03

[13983352 rows x 2 columns]


In [18]:
if not os.path.exists('output\\assocBlacklist.pickle'):
    createAssocBlacklist()

assocBlacklist = pickle.load(open('output\\assocBlacklist.pickle', 'rb'))
print(assocBlacklist)

['unknown_large_wallet_01', 'vircurex.com', 'slushpool.com', 'ccedk.com', 'kano.is', 'germanplazamarket', 'coinjar.com', 'ghash.io', 'localbitcoins.com', 'simplecoin.cz', 'btcoracle.com', 'binance.com', 'bit-x.com', 'satoshidice.com', '999dice.com', 'nitrogensports.eu', 'vaultoro.com', 'cryptopay.me', 'btradeaustralia.com', 'cryptonator.com', 'loanbase.com', 'bitbay.net', 'coinomat.com', 'pocketdice.io', 'yabtcl.com', 'kraken.com', 'coinurl.com', 'clevercoin.com', 'bitzlato.com', 'antpool.com', 'genesis-mining.com', 'coinhako.com', 'coingi.com', 'bitso.com', 'bitkonan.com', 'cointrader.net', 'bter.com', 'bw.com', 'bitminter.com', 'bitzino.com', 'spectrocoin.com', 'vip72.com', 'cryptonit.net', 'doctordmarket', 'betmoose.com', 'virwox.com', 'btc38.com', 'betcoin.ag', 'coinpayments.net', 'bylls.com', 'primedice.com', 'jetwin.com', 'eligius.st', 'betcoin.tm', 'nucleusmarket', 'bleutrade.com', 'coinroll.com', 'quadrigacx.com', 'maicoin.com', 'coinmate.io', 'btcmarkets.net', 'unknown_large_w

In [None]:
# use existing addressesToCluster if exists
if not os.path.exists('output\\addressesToCluster.pickle'):
    createAddressesToClusterList()

addressesToCluster = pickle.load(open('output\\addressesToCluster.pickle', 'rb'))
print(addressesToCluster)

['12sDU3FyYJXc2oRzE6XXuuhVHCBJvaoCC8', '1348ThkNoDupq1bws95diMiL8haGs61K7M', '13Pcmh4dKJE8Aqrhq4ZZwmM1sbKFcMQEEV', '15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf', '164fawNZVwsR5SamAJypvCMtkMx4Xv1B3f', '179bzhS4FY7qLDza9YjuorhWyXVVYZu2YH', '17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', '17UUXDzPGkMwWrabhtk7YCha88tSoua2Vr', '19D1iGzDr7FyAdiy3ZZdxMd6ttHj1kj6WW', '19XVEDZCGVMA9WCF1qUayxtnjUnyD7zDDQ', '1BPf9qr7M5xUgNHUYtrQtEKvUKcyERzXao', '1C6hetVWVXZnS6P2BYBNu5Y1ZJ57JyXGac', '1DrhHEkv42JVwiDQNi28JFdSuiSGgPNXwP', '1EDcKCRypUTFoTZbxDWF9MBAT4W7XUGB32', '1EfmRn6Bp3cjrTBubaH8MzRRc2ikSjNGXw', '1EnX6BuJiGWydqXJT9BN5dSvfLg3QW4Mdz', '1EVTZmTMqZPMzGxsug9TXBtvPJZH8dXSCK', '1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR', '1GALPyvUDDXqA6H2eHQ9Y1yidfQ6T1Drvn', '1GC2SjzCyCwxo1uxTi28oqn9L3mJj7bLPs', '1Gg25VzQkqCizXHNSNet4RoysLEe19su4s', '1JpSBaUwrZaEgmsYka7mzm9t3Z4syyaw7A', '1LhRW1msre1cFgT7fBY2BRrZ4ANMPwVj9u', '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp', '1LPTaRfyoNwvwAtmYzcetZLjBfUxVkJrr4', '1MMaU5nTrFdPZotfwdbv1wWnFjLCTFbpPY', '1uLdz4wXrc