# üöÄ Solana: Skalert uthenting av blokker med adaptiv parallellisme

Denne notatboken henter blokker i batcher p√• 1000, og √∏ker antall samtidige tilkoblinger (`limit`) fra 20 opp til maks 50.

‚úÖ Inkluderer:
- Robust `asyncio` + `aiohttp`-pipeline
- Automatisk retries og logging
- Tidsm√•ling per batch
- Slutter hvis feilmelding oppst√•r

---


In [None]:
!pip install aiohttp pandas pyarrow tqdm nest_asyncio


In [None]:
import aiohttp
import asyncio
import nest_asyncio
import pandas as pd
from tqdm.notebook import tqdm
import time
import json

nest_asyncio.apply()


In [None]:
RPC_URL = "https://api.mainnet-beta.solana.com"
HEADERS = {"Content-Type": "application/json"}

async def fetch_block(session, slot, retries=3):
    payload = {
        "jsonrpc": "2.0",
        "id": slot,
        "method": "getBlock",
        "params": [slot, {"maxSupportedTransactionVersion": 0}]
    }
    for attempt in range(retries):
        try:
            async with session.post(RPC_URL, headers=HEADERS, data=json.dumps(payload)) as resp:
                if resp.status != 200:
                    raise Exception(f"Status {resp.status}")
                res = await resp.json()
                result = res.get("result", None)
                if not result:
                    return []

                block_time = result.get("blockTime")
                txs = result.get("transactions", [])
                parsed = []
                for tx in txs:
                    message = tx['transaction']['message']
                    meta = tx['meta']
                    if not meta or not meta.get('postBalances'):
                        continue

                    accounts = message['accountKeys']
                    from_addr = accounts[0]
                    to_addr = accounts[1] if len(accounts) > 1 else None

                    lamports = (
                        meta['postBalances'][1] - meta['preBalances'][1]
                        if len(meta['postBalances']) > 1 else 0
                    )

                    parsed.append({
                        'slot': slot,
                        'timestamp': block_time,
                        'tx_signature': tx['transaction']['signatures'][0],
                        'from_address': from_addr,
                        'to_address': to_addr,
                        'lamports': lamports
                    })
                return parsed
        except Exception as e:
            if attempt == retries - 1:
                print(f"‚ùå Feil etter {retries} fors√∏k p√• slot {slot}: {e}")
                return []
            await asyncio.sleep(1)  # vent litt f√∏r retry


In [None]:
async def main():
    start_slot = 357230000  # Juster startpunkt
    total_batches = 10      # Antall 1000-blokkers batcher (juster etter behov)
    batch_size = 1000
    limit = 20              # Startverdi for parallelle kall

    all_results = []
    for batch_num in range(total_batches):
        current_slots = list(range(start_slot + batch_num * batch_size,
                                   start_slot + (batch_num + 1) * batch_size))
        print(f"üì¶ Henter batch {batch_num+1} med limit={limit} for slots {current_slots[0]}‚Äì{current_slots[-1]}")

        batch_start = time.time()
        connector = aiohttp.TCPConnector(limit=limit)
        try:
            async with aiohttp.ClientSession(connector=connector) as session:
                tasks = [fetch_block(session, slot) for slot in current_slots]
                batch_results = []
                for future in tqdm(asyncio.as_completed(tasks), total=len(tasks), desc=f"Batch {batch_num+1}"):
                    data = await future
                    batch_results.extend(data)

            df_batch = pd.DataFrame(batch_results)
            df_batch['timestamp'] = pd.to_datetime(df_batch['timestamp'], unit='s')
            df_batch.to_parquet(f"solana_batch_{batch_num+1}.parquet", index=False, compression='snappy')
            print(f"‚úÖ Batch {batch_num+1} ferdig p√• {time.time() - batch_start:.2f} sekunder. Lagret {len(df_batch)} rader.")
            all_results.extend(batch_results)

            if limit < 50:
                limit += 1  # √òk limit forsiktig

        except Exception as e:
            print(f"üõë Stoppet p√• batch {batch_num+1} med limit={limit}: {e}")
            break

    print(f"üéâ Ferdig. Totalt {len(all_results)} transaksjoner hentet.")
    df_all = pd.DataFrame(all_results)
    df_all['timestamp'] = pd.to_datetime(df_all['timestamp'], unit='s')
    df_all.to_parquet("solana_all_batches.parquet", index=False, compression='snappy')

await main()
