In [None]:
# %% -------------------------- Setup & Constants --------------------------------
from __future__ import annotations
import json, os, time, requests
from pathlib import Path
from typing import List, Set

TX_TARGET          = 10_000          # target number of transactions
OUTPUT_CSV         = Path("latest10k_wallets.csv")     # <-- nuovo
PROXY_URL          = os.getenv("ROTATING_PROXY_URL")   # optional
REQUEST_TIMEOUT    = 30              # seconds
MAX_RETRIES        = 3
BACKOFF_FACTOR     = 1.5
RATE_LIMIT_DELAY   = 1.0             # minimum wait between requests (s)

In [None]:
# %% -------------------------- Helper HTTP con retry ---------------------------
def get_retry(url: str, session: requests.Session) -> dict | None:
    for attempt in range(MAX_RETRIES + 1):
        try:
            r = session.get(url, timeout=REQUEST_TIMEOUT)
            if r.status_code == 200:
                return r.json()
            if r.status_code in (429, 500, 502, 503, 504):
                delay = BACKOFF_FACTOR ** attempt
                print(f" {url} -> {r.status_code}, retry fra {delay:.1f}s")
                time.sleep(delay)
            else:
                print(f" {url} -> {r.status_code}")
                return None
        except requests.RequestException as e:
            delay = BACKOFF_FACTOR ** attempt
            print(f" {type(e).__name__}: {e} - retry fra {delay:.1f}s")
            time.sleep(delay)
    return None

In [None]:
# %% -------------------------- Fetch Transactions ----------------------------
def fetch_last_transactions(tx_target: int = TX_TARGET) -> List[dict]:
    """Walk backwards through blocks until we have at least *tx_target* transactions."""
    proxy = {"http": PROXY_URL, "https": PROXY_URL} if PROXY_URL else None
    tx_list: List[dict] = []

    with requests.Session() as s:
        if proxy:
            s.proxies.update(proxy)

        latest = get_retry("https://blockchain.info/latestblock", s)
        if not latest:
            raise RuntimeError("Unable to fetch latest block")

        block_hash = latest["hash"]
        height     = latest["height"]
        print(f"Starting from block {height} ({block_hash[:12]}...)")

        while len(tx_list) < tx_target:
            block = get_retry(f"https://blockchain.info/rawblock/{block_hash}", s)
            if not block:
                print("Block skipped due to error, exiting.")
                break

            tx_list.extend(block["tx"])
            print(f" Block {block['height']} - added {block['n_tx']} tx "
                  f"(tot {len(tx_list):,}/{tx_target:,})")
            block_hash = block["prev_block"]
            time.sleep(RATE_LIMIT_DELAY)            # respect rate-limit

    return tx_list[:tx_target]                      # truncate to exact target

In [None]:
# %% -------------------------- Extract Wallets --------------------------------
def extract_addresses(txs: List[dict]) -> Set[str]:
    """Collect all `addr` addresses present in input/output."""
    addrs: Set[str] = set()
    for tx in txs:
        # output
        for o in tx.get("out", []):
            addr = o.get("addr")
            if addr: addrs.add(addr)
        # input
        for i in tx.get("inputs", []):
            prev = i.get("prev_out", {})
            addr = prev.get("addr")
            if addr: addrs.add(addr)
    return addrs

In [None]:
# %% -------------------------- Main Execution -----------------------------------
if __name__ == "__main__":
    txs      = fetch_last_transactions()
    wallets  = sorted(extract_addresses(txs))
    print(f"\n Found {len(wallets):,} unique wallets from {len(txs):,} transactions")

    # Export to CSV
    import csv
    with OUTPUT_CSV.open("w", newline="") as fh:
        writer = csv.writer(fh)
        writer.writerow(["address"])        # header
        for addr in wallets:
            writer.writerow([addr])
    print(f" CSV saved to {OUTPUT_CSV.resolve()}")