# INSTALL: packages

In [84]:
pip install nbformat

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [85]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\info\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [86]:
import pandas as pd
import os
import pickle
import WalletClustering_neo4jConnect

In [87]:
conn = WalletClustering_neo4jConnect.conn

# DEFINE: other methods

Were used in older versions of this code. Kept for consistency of other notebooks.

In [88]:
# flattens list of lists
def flatten(list):
    return [item for sublist in list for item in sublist]

# DEFINE: get interesting addresses excluding blacklisted addresses

## create output directory

In [89]:
try:
    os.mkdir('output')
except:
    pass

## query terror addresses

In [90]:
#The below code is returning a list of all the addresses that are marked as terror addresses.
#Query takes around 10 min
def returnTerrorAddresses():

    query_string = '''
    MATCH (a:Address {isTerror: True})
    Return a.address
    '''

    response = conn.query(query_string, db='neo4j')
    terrorAddresses = [r[0] for r in response]
    return terrorAddresses

In [91]:
# comment out entire block if old terrorAddressList saved as pickle should be used
def createTerrorAddressList():
    terrorAddressList = returnTerrorAddresses()
  
    # save terrorAddressList to file
    with open('output\\terrorAddressList.pickle', 'wb') as export:
        pickle.dump(terrorAddressList, export)

## create blacklist from csv

In [92]:
# allows blacklist of addresses in json format
def createBlacklistCSV():
    blacklistAddresses = pd.DataFrame(columns =['address', 'association'])

    dirname = os.path.dirname(os.path.realpath('__file__'))

    if os.path.isdir('..\\EntityAddressBitcoin\\'):        
        df = pd.read_csv(os.path.join(os.path.join(dirname, '..\\EntityAddressBitcoin\\'), 'Exchanges_full_detailed.csv'), usecols=['association', 'hashAdd'])[['hashAdd', 'association']]
        df.columns=['address', 'association']
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)

    #export blacklistAddresses to file
    with open('output\\blacklistAddresses.pickle', 'wb') as export:
        pickle.dump(blacklistAddresses, export)

## create blacklist from json

In [93]:
# allows blacklist of addresses in json format
def createBlacklist():
  
  # first create CSV blacklist, then append JSON
  createBlacklistCSV()
  
  if os.path.exists('output\\blacklistAddresses.pickle'):
    blacklistAddresses = pickle.load(open('output\\blacklistAddresses.pickle', 'rb'))
  else:
    blacklistAddresses = pd.DataFrame(columns =['address', 'association'])
  
  dirname = os.path.dirname(os.path.realpath('__file__'))
  
  # iterate over collection of exchange addresses in json format to create blacklist
  if os.path.isdir('..\\WalletExplorerScraper\\Output\\exchange\\'):
    outputScrapeExchangeFolder = os.path.join(dirname, '..\\WalletExplorerScraper\\Output\\exchange\\')
    for subfolder in os.listdir(outputScrapeExchangeFolder):
        print(subfolder)
        df = pd.read_json(os.path.join(outputScrapeExchangeFolder, subfolder, 'scraped_addresses.json'))
        df.columns=['address']
        df['association'] = subfolder.split("_",1)[1]
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
        continue
  
  if os.path.isdir('..\\WalletExplorerScraper\\Output\\mixer\\'):
    outputScrapeMixerFolder = os.path.join(dirname, '..\\WalletExplorerScraper\\Output\\mixer\\')
    for subfolder in os.listdir(outputScrapeMixerFolder):
        print(subfolder)
        df = pd.read_json(os.path.join(outputScrapeMixerFolder, subfolder, 'scraped_addresses.json'))
        df.columns=['address']
        df['association'] = subfolder.split("_",1)[1]
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
        continue

  if os.path.isdir('..\\WalletExplorerScraper\\Output\\giant_wallet\\'):
    outputScrapeGiantWalletFolder = os.path.join(dirname, '..\\WalletExplorerScraper\\Output\\giant_wallet\\')
    for subfolder in os.listdir(outputScrapeGiantWalletFolder):
        print(subfolder)
        df = pd.read_json(os.path.join(outputScrapeGiantWalletFolder, subfolder, 'scraped_addresses.json'))
        df.columns=['address']
        df['association'] = subfolder.split("_",1)[1]
        blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
        continue
      
  blacklistAddresses['association'] = blacklistAddresses['association'].str.lower()
  blacklistAddresses = blacklistAddresses.drop_duplicates('address', keep='last')

  #export blacklistAddresses to file
  with open('output\\blacklistAddresses.pickle', 'wb') as export:
    pickle.dump(blacklistAddresses, export)

## create list of blacklisted associations

In [94]:
def createAssocBlacklist():
    assocBlacklistDf = pickle.load(open('output\\blacklistAddresses.pickle', 'rb'))
    assocBlacklistDf = assocBlacklistDf.drop_duplicates('association', keep='first')
    assocBlacklist = assocBlacklistDf['association'].tolist()
    
    # otherServicesListDf = pickle.load(open('output/blacklistAddressesOSMIH.pickle', 'rb')) # list of other services addresses that have been clustered with MIH
    # otherServicesListDf = otherServicesListDf.drop_duplicates('otherServices', keep='last')
    # otherServicesList = otherServicesListDf['otherService'].tolist()
    # assocBlacklist.extend(otherServicesList)

    with open('output\\assocBlacklist.pickle', 'wb') as export:
        pickle.dump(list(set(assocBlacklist)), export)


## remove blacklisted addresses from addresses of interest

In [95]:
# to be used for removal of exchange addresses
def addressCleanUp(addressList, blacklist):
    interestingAddresses = []
    for address in addressList:
        if not blacklist['address'].str.contains(address).any():
            interestingAddresses.append(address)
    return interestingAddresses

In [96]:
def createAddressesToClusterList():
    addressesToCluster = addressCleanUp(terrorAddressList, blacklistAddresses)

    # save addressesToCluster to file
    with open('output\\addressesToCluster.pickle', 'wb') as export:
        pickle.dump(addressesToCluster, export)

# EXECUTE: get interesting addresses excluding blacklisted addresses

In [97]:
# use existing terrorAddressList if exists
if not os.path.exists('output\\terrorAddressList.pickle'):
    createTerrorAddressList()

terrorAddressList = pickle.load(open('output\\terrorAddressList.pickle', 'rb'))
print(terrorAddressList)

['12sDU3FyYJXc2oRzE6XXuuhVHCBJvaoCC8', '1348ThkNoDupq1bws95diMiL8haGs61K7M', '13iQsrwBYdrLpnitG5EV79o3PeHjH8XUBc', '13Pcmh4dKJE8Aqrhq4ZZwmM1sbKFcMQEEV', '15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf', '15soXrE3NJBMkkQhrccXonTT9bpjpPvE67', '164fawNZVwsR5SamAJypvCMtkMx4Xv1B3f', '179bzhS4FY7qLDza9YjuorhWyXVVYZu2YH', '17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', '17UUXDzPGkMwWrabhtk7YCha88tSoua2Vr', '19D1iGzDr7FyAdiy3ZZdxMd6ttHj1kj6WW', '19XVEDZCGVMA9WCF1qUayxtnjUnyD7zDDQ', '1A7pDH1EdrkH9YZtsPnc8uzirBFnAN9Eay', '1BPf9qr7M5xUgNHUYtrQtEKvUKcyERzXao', '1C6hetVWVXZnS6P2BYBNu5Y1ZJ57JyXGac', '1DrhHEkv42JVwiDQNi28JFdSuiSGgPNXwP', '1EDcKCRypUTFoTZbxDWF9MBAT4W7XUGB32', '1EfmRn6Bp3cjrTBubaH8MzRRc2ikSjNGXw', '1EnX6BuJiGWydqXJT9BN5dSvfLg3QW4Mdz', '1EVTZmTMqZPMzGxsug9TXBtvPJZH8dXSCK', '1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR', '1GALPyvUDDXqA6H2eHQ9Y1yidfQ6T1Drvn', '1GC2SjzCyCwxo1uxTi28oqn9L3mJj7bLPs', '1Gg25VzQkqCizXHNSNet4RoysLEe19su4s', '1JpSBaUwrZaEgmsYka7mzm9t3Z4syyaw7A', '1LhRW1msre1cFgT7fBY2BRrZ4ANMPwVj9u', '1Lm9BCDUKo

In [98]:
# use existing blacklist if exists
if not os.path.exists('output\\blacklistAddresses.pickle'):
    createBlacklist()

blacklistAddresses = pickle.load(open('output\\blacklistAddresses.pickle', 'rb'))
print(blacklistAddresses)

  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


0_huobi.com
100_banx.io


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


101_banx.io
102_clevercoin.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


103_gatecoin.com
104_gatecoin.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


105_indacoin.com
106_coinarch.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


107_bitcoinvietnam.com.vn
108_coinchimp.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


109_cryptonit.net
10_bitzlato.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


110_cryptonit.net
111_coingi.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


112_exchange-credit.ru
113_bitso.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


114_coinimal.com
115_empoex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


116_ccedk.com
117_usecryptos.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


118_bitcoinp2p.com.br
119_coinbroker.io


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


11_localbitcoins.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


12_localbitcoins.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


13_bitstamp.net


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


14_bitstamp.net
15_mercadobitcoin.com.br


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


16_cryptsy.com
17_cryptsy.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


18_binance.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


19_binance.com
1_huobi.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


20_bitcoin.de


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


21_bitcoin.de
22_cex.io


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


23_btctrade.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


24_yobit.net


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


25_okcoin.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


26_okcoin.com
27_coinspot.com.au


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


28_btcc.com
29_btcc.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


2_bittrex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


30_btcc.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


31_bx.in.th


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


32_hitbtc.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


33_maicoin.com
34_bter.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


35_bter.com
36_bter.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


37_bter.com
38_bter.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


39_hashnest.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


3_luno.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


40_anxpro.com
41_bitbay.net


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


42_bleutrade.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


43_bitfinex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


44_bitfinex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


45_bitfinex.com
46_matbea.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


47_coinhako.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


48_bit-x.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


49_coinmotion.com
4_poloniex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


50_virwox.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


51_paxful.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


52_bitbargain.co.uk


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


53_spectrocoin.com
54_cavirtex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


55_c-cex.com
56_c-cex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


57_therocktrading.com
58_therocktrading.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


59_foxbit.com.br


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


5_kraken.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


60_foxbit.com.br


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


61_foxbit.com.br


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


62_foxbit.com.br


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


63_vircurex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


64_bitvc.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


65_exmo.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


66_btc38.com
67_igot.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


68_blocktrades.us


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


69_simplecoin.cz


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


6_kraken.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


70_simplecoin.cz


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


71_simplecoin.cz


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


72_simplecoin.cz


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


73_simplecoin.cz


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


74_simplecoin.cz


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


75_fybsg.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


76_campbx.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


77_cointrader.net


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


78_bitcurex.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


79_coinmate.io


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


7_btc-e.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


80_korbit.co.kr


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


81_vaultoro.com
82_exchanging.ir


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


83_796.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


84_happycoins.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


85_btcmarkets.net


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


86_chbtc.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


87_coins-e.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


88_litebit.eu


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


89_coincafe.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


8_btc-e.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


90_urdubit.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


91_btradeaustralia.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


92_btradeaustralia.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


93_mexbt.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


94_coinomat.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


95_orderbook.net


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


96_lakebtc.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)
  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


97_bitkonan.com
98_quadrigacx.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


99_banx.io


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


9_btc-e.com


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


0_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


10_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


11_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


12_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


13_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


14_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


15_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


16_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


17_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


18_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


19_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


1_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


20_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


21_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


22_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


23_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


24_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


25_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


26_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


27_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


28_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


29_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


2_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


30_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


31_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


32_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


33_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


34_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


3_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


4_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


5_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


6_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


7_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


8_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


9_helix


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


0_unknown_large_wallet_01


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


1_unknown_large_wallet_02


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


2_unknown_large_wallet_03


  blacklistAddresses = blacklistAddresses.append(df, ignore_index=True)


                                     address              association
1         1CrhRvGShwmQkrGcqFzDDwDDBiEhKpHAqF               anxpro.com
2         1AbH5pFqocUYVhRqPAtJruVr5DMAxEANmn               anxpro.com
4         1FM8vDXinj2N5LoTXX5J4MSGTCn2pPkqRm               anxpro.com
7         18g2RSp4MxzkyNkigpYDejP2jMgkJ2Po9A               anxpro.com
18        195rycdSmRf1tQDztryRkZWKaFgFwM1PPG              bittrex.com
...                                      ...                      ...
21606795  1JiTqsNvfjkGSYVHMKHpWbNwEfN2LUhjKM  unknown_large_wallet_03
21606796  1HoNkBpNPkcznGWe6nUeLqffeLQN1TwhFJ  unknown_large_wallet_03
21606797  1BmvAcQPLcFve9KpNigfF591PyQnZwNDyQ  unknown_large_wallet_03
21606798  1GNw9S6hSzVQ83gUkP36ypjyrH3d6YZRm7  unknown_large_wallet_03
21606799  1McAmDN95SGxaTMmymaN8QzzTGJADM3aZy  unknown_large_wallet_03

[13983352 rows x 2 columns]


In [99]:
if not os.path.exists('output\\assocBlacklist.pickle'):
    createAssocBlacklist()

assocBlacklist = pickle.load(open('output\\assocBlacklist.pickle', 'rb'))
print(assocBlacklist)

['bitfinex.com', 'matbea.com', 'unknown_large_wallet_01', 'luno.com', 'coinspot.com.au', 'orderbook.net', 'bitvc.com', 'empoex.com', 'bitzlato.com', 'huobi.com', 'foxbit.com.br', 'vircurex.com', 'kraken.com', 'bitcoinp2p.com.br', 'fybsg.com', 'hitbtc.com', 'therocktrading.com', 'coincafe.com', 'btc-e.com', 'urdubit.com', 'exchangemycoins.com', 'indacoin.com', 'paxful.com', 'btcmarkets.net', 'virwox.com', 'bitso.com', '1coin.com', 'exmo.com', 'coinmotion.com', 'hashnest.com', 'coinhako.com', 'binance.com', 'clevercoin.com', 'banx.io', 'coins-e.com', 'cointrader.net', 'bit-x.com', 'unknown_large_wallet_03', 'btcc.com', 'bitbay.net', 'bx.in.th', 'coinmate.io', 'c-cex.com', 'btctrade.com', 'cryptsy.com', 'happycoins.com', 'exchange-credit.ru', 'igot.com', '796.com', 'anxpro.com', 'ccedk.com', 'okcoin.com', 'btradeaustralia.com', 'bitstamp.net', 'litebit.eu', 'gatecoin.com', 'quadrigacx.com', 'bitcurex.com', 'mercadobitcoin.com.br', 'coinimal.com', 'korbit.co.kr', 'coinchimp.com', 'coinbrok

In [100]:
# use existing addressesToCluster if exists
if not os.path.exists('output\\addressesToCluster.pickle'):
    createAddressesToClusterList()

addressesToCluster = pickle.load(open('output\\addressesToCluster.pickle', 'rb'))
print(addressesToCluster)

['12sDU3FyYJXc2oRzE6XXuuhVHCBJvaoCC8', '1348ThkNoDupq1bws95diMiL8haGs61K7M', '13Pcmh4dKJE8Aqrhq4ZZwmM1sbKFcMQEEV', '15K9Zj1AU2hjT3ebZMtWqDsMv3fFxTNwpf', '164fawNZVwsR5SamAJypvCMtkMx4Xv1B3f', '179bzhS4FY7qLDza9YjuorhWyXVVYZu2YH', '17QAWGVpFV4gZ25NQug46e5mBho4uDP6MD', '17UUXDzPGkMwWrabhtk7YCha88tSoua2Vr', '19D1iGzDr7FyAdiy3ZZdxMd6ttHj1kj6WW', '19XVEDZCGVMA9WCF1qUayxtnjUnyD7zDDQ', '1BPf9qr7M5xUgNHUYtrQtEKvUKcyERzXao', '1C6hetVWVXZnS6P2BYBNu5Y1ZJ57JyXGac', '1DrhHEkv42JVwiDQNi28JFdSuiSGgPNXwP', '1EDcKCRypUTFoTZbxDWF9MBAT4W7XUGB32', '1EfmRn6Bp3cjrTBubaH8MzRRc2ikSjNGXw', '1EnX6BuJiGWydqXJT9BN5dSvfLg3QW4Mdz', '1EVTZmTMqZPMzGxsug9TXBtvPJZH8dXSCK', '1EYya5dfNvuYDwpeboGKBtkXzJcEHMCQXR', '1GALPyvUDDXqA6H2eHQ9Y1yidfQ6T1Drvn', '1GC2SjzCyCwxo1uxTi28oqn9L3mJj7bLPs', '1Gg25VzQkqCizXHNSNet4RoysLEe19su4s', '1JpSBaUwrZaEgmsYka7mzm9t3Z4syyaw7A', '1LhRW1msre1cFgT7fBY2BRrZ4ANMPwVj9u', '1Lm9BCDUKoBUk888DCXewM5p8bJyr83cEp', '1LPTaRfyoNwvwAtmYzcetZLjBfUxVkJrr4', '1MMaU5nTrFdPZotfwdbv1wWnFjLCTFbpPY', '1uLdz4wXrc