## Notebook overview
A graphql query to get Arweave tx header data from random block samples. 

The query is executed on the arweave.net subgraph gateway, currently operated by ar.io

### to do
- add backoff to transport

In [118]:
# libraries
import pandas as pd
import numpy as np
from gql import Client, gql
from graphql import print_ast
from gql.transport.aiohttp import AIOHTTPTransport
import random
import asyncio
import os

In [None]:
# get working directory
wd_path = os.getcwd()

In [58]:
# initial block params

# set initial block range
max_block = 1391438
min_block = 1200000

# sample size
block_sample_size = 10000

In [59]:
# generic query
# construct generic query. Note, max page results are 100
query = gql('''
    query getTransactions($cursor: String, $minBlock: Int, $maxBlock: Int) {
        transactions(block: {min: $minBlock, max: $maxBlock}, bundledIn: null, first: 50, after: $cursor) {
            pageInfo {
                hasNextPage
            }
            edges {
                cursor
                node {
                    id
                    fee {
                        ar
                    }
                    block {
                        timestamp
                        height
                    }
                    data {
                        size
                    }
                }
            }
        }
    }
''')

In [62]:
async def fetch_data(next_cursor, query, random_block_i, block_i, transaction_data, i):

    # create transport
    # ar.io endpoint
    transport = AIOHTTPTransport(url="https://arweave.net/graphql")

    # Create a GraphQL client
    async with Client(transport=transport, fetch_schema_from_transport=False) as session:
            
            try:
                # Execute the GraphQL query with cursor as a variable
                result = await session.execute(query, variable_values={"cursor": next_cursor, "minBlock": random_block_i, "maxBlock": random_block_i})

                # Process the result and append to master list
                for transaction in result["transactions"]["edges"]:
                    transaction_data.append({
                        "hash": transaction['node']['id'],
                        "height": transaction['node']['block']['height'],
                        "timestamp": transaction['node']['block']['timestamp'],
                        "ar_fee": transaction['node']['fee']['ar'],
                        "data_size_bytes": transaction['node']['data']['size']
                    })

                    # capture the last record's cursor
                    next_cursor = transaction['cursor']

                #tell us how many loops have completed and query result count
                i = i + 1

                # Pagination: fetch next page if hasNextPage is True
                page_info = result["transactions"]["pageInfo"]

                if page_info["hasNextPage"]:
                    await fetch_data(next_cursor, query, random_block_i, block_i, transaction_data, i)

                # print some logs
                print(f"loop count: {block_i}; block_i: {random_block_i}; attempt: {i}; objects returned: {len(result['transactions']['edges'])}; cursor: {next_cursor}; next page: {page_info['hasNextPage']}")

            except asyncio.TimeoutError:
                print('Timeout error. Query results processed.')
            
            return transaction_data

# call main - get 1 random block for each call
async def appendList(next_cursor, query, random_block_i, block_i, transaction_data, i):

    return await fetch_data(next_cursor, query, random_block_i, block_i, transaction_data, i)

In [None]:
# Loop to sample blocks and get tx data

# create empty master list to append to
block_transactions_master = []

for block_i in range(block_sample_size):
    
    # get random block number in range
    random_block_i = random.randint(min_block,max_block)

    # loop cursor
    i = 0

    # empty list to hold data
    transaction_data = []

    # create next_cursor variable with default value None
    next_cursor = None

    # get the block txs
    block_transactions_master.append(await appendList(next_cursor, query, random_block_i, block_i, transaction_data, i))

print('Finished looping')

# flaten list
block_transactions_master = [item for sublist in block_transactions_master for item in sublist]

df_transactions = pd.DataFrame(block_transactions_master)

In [None]:
# export to csv
df_transactions.to_csv(f'{wd_path}/weave_txs.csv', index=False)