<a href="https://colab.research.google.com/github/madztheo/sorting-hat-ai/blob/main/fetch_wallet_data_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import asyncio
import requests
import csv
from google.colab import drive

# Set constants
API_KEY = ""
MAX_TRANSACTIONS = 9999
MAX_LVL_2_NEIGHBOURS = 99

# Mount Google Drive
drive.mount('/content/gdrive')

#Semaphore to limit simultaneous queries (API limit)
sem = asyncio.BoundedSemaphore(5)

async def fetch_transactions(url):
    """
    Fetches transactions for a given wallet address using the specified URL.
    """
    async with sem:
      response = await asyncio.get_event_loop().run_in_executor(None, requests.get, url)
      await asyncio.sleep(1)
      transactions = response.json().get("result", [])
      return transactions

async def fetch_all_transactions(wallet_address):
    """
    Fetches all transactions for a given wallet address asynchronously.
    """
    # Create the URLs for each type of transaction
    normal_transactions_url = f"https://api.etherscan.io/api?module=account&action=txlist&address={wallet_address}&startblock=0&endblock=99999999&page=1&offset={MAX_TRANSACTIONS}&sort=asc&apikey={API_KEY}"
    erc20_transactions_url = f"https://api.etherscan.io/api?module=account&action=tokentx&address={wallet_address}&page=1&offset={MAX_TRANSACTIONS}&sort=asc&apikey={API_KEY}"
    erc721_transactions_url = f"https://api.etherscan.io/api?module=account&action=tokennfttx&address={wallet_address}&page=1&offset={MAX_TRANSACTIONS}&sort=asc&apikey={API_KEY}"
    erc1155_transactions_url = f"https://api.etherscan.io/api?module=account&action=token1155tx&address={wallet_address}&page=1&offset={MAX_TRANSACTIONS}&sort=asc&apikey={API_KEY}"

    # Fetch the transactions concurrently
    tasks = []
    tasks.append(fetch_transactions(normal_transactions_url))
    tasks.append(fetch_transactions(erc20_transactions_url))
    tasks.append(fetch_transactions(erc721_transactions_url))
    tasks.append(fetch_transactions(erc1155_transactions_url))
    transactions = await asyncio.gather(*tasks)
    # Flatten the results
    ret = [transaction for sublist in (transactions if transactions else []) for transaction in (sublist if sublist else [])]
    print(ret[:1])
    return ret

def extract_neighbours(wallet_address, transactions, max=0):
    """
    Extracts the neighbours of a given wallet address from a list of transactions.
    """
    print(f"Extracting neighbours for {wallet_address}")
    neighbours_list = []
    for tx in transactions:
        if not (tx['to'].startswith('0x0000000000') or tx['from'].startswith('0x0000000000')):
            neighbours_list.append(tx['from'] if tx['to'] == wallet_address else tx['to'])
            if len(neighbours_list) == max:
                break
    neighbours_set = set([x for x in neighbours_list if x != ''])
    if wallet_address in neighbours_set:
        neighbours_set.remove(wallet_address)
    neighbours_list = list(neighbours_set)
    return neighbours_list

async def generate_graph(wallet_address):
  transactions = await fetch_all_transactions(wallet_address)
  first_neighbours = extract_neighbours(wallet_address, transactions)
  neighbours_by_wallet = {}
  neighbours_by_wallet[wallet_address] = first_neighbours
  i = 0
  imax = 99;
  for wallet in first_neighbours:
    i+=1
    if i <= imax:
      print(f'{i}/{len(first_neighbours)} (max {imax} fetched)')
      transactions = await fetch_all_transactions(wallet)
      neighbours_by_wallet[wallet] = extract_neighbours(wallet, transactions, MAX_LVL_2_NEIGHBOURS)
    else:
      print(f'{i}/{len(first_neighbours)} (max {imax} fetched)', end='\x1b[1K\r')
      neighbours_by_wallet[wallet] = []
  print('')
  return neighbours_by_wallet

Mounted at /content/gdrive


In [None]:
# Main execution
some_addresses = {
    "vitalik": "0xab5801a7d398351b8be11c439e05c5b3259aec9b", #~5k neighbours
    "mcuban": "0xa679c6154b8d4619af9f83f0bf9a13a680e01ecf", #~2k neighbours
    "beeple": "0xc6b0562605D35eE710138402B878ffe6F2E23807", #~400 neighbours
    "titanmoon": "0x850920d220086aEA133c7EE7C7c98906a71ce45E", #14 neighbours
    "nakiri": "0x73d30ba3dc4fFD17C28cc2D75D12e50dF98f29CF", #3k neighbours
    "steve aoki": "0xe4bBCbFf51e61D0D95FcC5016609aC8354B177C4", #~400 neighbours
    "deroidz": "0x4B05FeE30ba471941Ed1fD6Fb3f68BCdFECfc036", #93 neighbours
    "bighairymonster": "0x8287B1662a42647c24ACe9fD3d6381D4446a2C79", #10 neighbours
}
wallet_name = "bighairymonster"
wallet_address = some_addresses[wallet_name]
csv_file = f'/content/gdrive/My Drive/Colab Notebooks/wallet_partial_data_{wallet_name}.csv'

# Run the whole process
graph_as_dic = await generate_graph(wallet_address)

# Write dic to CSV file for Gephi import
unique_tuples = []
i = 0
for key, value in graph_as_dic.items():
  i+=1
  print(f'{i}/{len(graph_as_dic)} post-processed', end='\x1b[1K\r')
  for w in value:
    if not ((key, w) in unique_tuples or (w, key) in unique_tuples):
      unique_tuples.append((key, w))
print()

with open(csv_file, mode="w", newline="") as file:
  writer = csv.writer(file)
  writer.writerow(["Source", "Target"])
  for (a, b) in unique_tuples:
    writer.writerow([a, b])


[{'blockNumber': '15605140', 'timeStamp': '1664045711', 'hash': '0xf323ac9a238d2baa80f468e6394d3088a2e0c316bf587a7f4debbb13ca361980', 'nonce': '1779061', 'blockHash': '0xc800604eee9fbbd5c2903f651a2633c2280a9a8826c73fafc697209fb800e3f0', 'transactionIndex': '59', 'from': '0x503828976d22510aad0201ac7ec88293211d23da', 'to': '0x8287b1662a42647c24ace9fd3d6381d4446a2c79', 'value': '111108950000000000', 'gas': '21000', 'gasPrice': '6723064697', 'isError': '0', 'txreceipt_status': '1', 'input': '0x', 'contractAddress': '', 'cumulativeGasUsed': '5010909', 'gasUsed': '21000', 'confirmations': '1618345', 'methodId': '0x', 'functionName': ''}]
Extracting neighbours for 0x8287B1662a42647c24ACe9fD3d6381D4446a2C79
1/8 (max 99 fetched)
[{'blockNumber': '15876394', 'timeStamp': '1667320391', 'hash': '0x869488d4b11ac6c93054d217c8941a183d7a5e8642fe7b6f5f5a8c1687052321', 'nonce': '137', 'blockHash': '0xc52cb780b90b99121c700cd9e96b3ac3094ba02af2cc90e29ce39b8cd6154716', 'transactionIndex': '125', 'from': '0