In [1]:
from web3 import Web3
import json
from tqdm import tqdm
import time
from hexbytes import HexBytes

# Connect to Gensyn Testnet
web3 = Web3(Web3.HTTPProvider("https://gensyn-testnet.g.alchemy.com/v2/K2RfQySTi3Dtrcv7XuI7lE1FMJMX25vg"))
print("✅ Connected:", web3.is_connected())

# Settings
MAX_BLOCKS = 100000
batch_size = 10000

latest_block = web3.eth.block_number
start_block = max(0, latest_block - MAX_BLOCKS + 1)
end_block = latest_block

print(f"Fetching latest {MAX_BLOCKS} blocks: from {start_block} to {end_block} in batches of {batch_size}")
total_batches = ((end_block - start_block) // batch_size) + 1
current_batch_num = 1

for batch_start in range(start_block, end_block + 1, batch_size):
    batch_end = min(batch_start + batch_size - 1, end_block)
    print(f"\nProcessing batch {current_batch_num}/{total_batches}: blocks {batch_start} to {batch_end}")

    batch_data = []

    for block_num in tqdm(range(batch_start, batch_end + 1), desc="Blocks"):
        try:
            block = web3.eth.get_block(block_num, full_transactions=True)
        except Exception as e:
            print(f"⚠️ Error fetching block {block_num}: {e}")
            continue

        block_info = {
            "block_number": block.number,
            "timestamp": block.timestamp,
            "gas_used": block.gasUsed,
            "gas_limit": block.gasLimit,
            "base_fee_per_gas": int(block.get("baseFeePerGas", 0)),
            "miner": str(block.get("miner")),
            "size": block.get("size"),
            "tx_count": len(block.transactions),
            "transactions": []
        }

        for tx in block.transactions:
            tx_data = {
                "hash": tx.hash.hex(),
                "from": str(tx['from']),
                "to": str(tx['to']) if tx['to'] else None,
                "value": float(web3.from_wei(tx['value'], 'ether')),
                "gas": tx['gas'],
                "gas_price": int(tx['gasPrice']),
                "input": tx['input'].hex() if isinstance(tx['input'], (bytes, HexBytes)) else tx['input'],
                "nonce": tx['nonce'],
                "transaction_type": tx.get('type', None),
                "max_fee_per_gas": int(tx.get('maxFeePerGas', 0)) if tx.get('maxFeePerGas') else None,
                "max_priority_fee_per_gas": int(tx.get('maxPriorityFeePerGas', 0)) if tx.get('maxPriorityFeePerGas') else None
            }
            block_info["transactions"].append(tx_data)

        batch_data.append(block_info)

    filename = f"gensyn_blocks_{batch_start}_{batch_end}.json"
    with open(filename, "w") as f:
        json.dump(batch_data, f, indent=2)

    print(f"Saved batch to {filename}")
    current_batch_num += 1
    time.sleep(1)

print("\n✅ Finished fetching blocks.")


✅ Connected: True
Fetching latest 100000 blocks: from 5058073 to 5158072 in batches of 10000

Processing batch 1/10: blocks 5058073 to 5068072


Blocks: 100%|██████████| 10000/10000 [10:22<00:00, 16.07it/s]


Saved batch to gensyn_blocks_5058073_5068072.json

Processing batch 2/10: blocks 5068073 to 5078072


Blocks: 100%|██████████| 10000/10000 [09:36<00:00, 17.34it/s]


Saved batch to gensyn_blocks_5068073_5078072.json

Processing batch 3/10: blocks 5078073 to 5088072


Blocks: 100%|██████████| 10000/10000 [09:29<00:00, 17.57it/s]


Saved batch to gensyn_blocks_5078073_5088072.json

Processing batch 4/10: blocks 5088073 to 5098072


Blocks: 100%|██████████| 10000/10000 [10:01<00:00, 16.61it/s]


Saved batch to gensyn_blocks_5088073_5098072.json

Processing batch 5/10: blocks 5098073 to 5108072


Blocks: 100%|██████████| 10000/10000 [09:51<00:00, 16.90it/s]


Saved batch to gensyn_blocks_5098073_5108072.json

Processing batch 6/10: blocks 5108073 to 5118072


Blocks: 100%|██████████| 10000/10000 [10:31<00:00, 15.83it/s] 


Saved batch to gensyn_blocks_5108073_5118072.json

Processing batch 7/10: blocks 5118073 to 5128072


Blocks: 100%|██████████| 10000/10000 [10:37<00:00, 15.69it/s]


Saved batch to gensyn_blocks_5118073_5128072.json

Processing batch 8/10: blocks 5128073 to 5138072


Blocks: 100%|██████████| 10000/10000 [09:52<00:00, 16.86it/s]


Saved batch to gensyn_blocks_5128073_5138072.json

Processing batch 9/10: blocks 5138073 to 5148072


Blocks: 100%|██████████| 10000/10000 [10:06<00:00, 16.50it/s] 


Saved batch to gensyn_blocks_5138073_5148072.json

Processing batch 10/10: blocks 5148073 to 5158072


Blocks: 100%|██████████| 10000/10000 [09:49<00:00, 16.97it/s]


Saved batch to gensyn_blocks_5148073_5158072.json

✅ Finished fetching blocks.


## Gensyn Testnet Data Extraction – AFYD Summary

### D — Do (What was done)

Extracted 100,000 of the most recent blocks from the Gensyn testnet using Alchemy’s EVM-compatible API.

For each block, the following data was collected:

- **Block-level fields**:
  - Block number
  - Timestamp
  - Gas used
  - Gas limit
  - Base fee per gas
  - Block size
  - Miner address
  - Transaction count

- **Transaction-level fields**:
  - Transaction hash
  - Sender and receiver addresses
  - Value (in Ether)
  - Gas and gas price
  - Nonce
  - Input data
  - Transaction type
  - Max fee per gas
  - Max priority fee per gas

The data was saved as structured JSON files in batches of 10,000 blocks per file, totaling 10 batch files for 100,000 blocks.

---

### Y — Why (Reasoning)

The goal of this step was to prepare a clean and comprehensive dataset for downstream analysis of activity on the Gensyn testnet.

By extracting both high-level block metadata and detailed transaction-level fields, the dataset enables flexible analysis of contract interactions, address activity, and gas fee behavior.

EIP-1559 fee fields and smart contract indicators (e.g., input data) were included to support future exploration of gas dynamics and contract usage.

---

### F — Find (Initial Findings)

No specific analysis has been performed yet. At this stage, the dataset has been extracted and saved for further exploration.

---

### A — Answer (Conclusion)

The data extraction process successfully created a complete and extensible dataset containing block-level and transaction-level details from the Gensyn testnet.

This dataset will now serve as the foundation for subsequent analysis tasks such as identifying active users, smart contract patterns, gas fee trends, and block composition characteristics.


In [None]:
all_tx = []

for file in tqdm(sorted(glob.glob("gensyn_blocks_*.json"))):
    try:
        with open(file, "r") as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        print(f"❌ Skipping file {file} due to JSONDecodeError: {e}")
        continue

    for block in data:
        for tx in block["transactions"]:
            all_tx.append({
                # Block-level info
                "block_number": block["block_number"],
                "timestamp": block["timestamp"],
                "gas_used_block": block["gas_used"],
                "gas_limit_block": block["gas_limit"],
                "base_fee_per_gas": block.get("base_fee_per_gas", None),
                "block_miner": block.get("miner", None),
                "block_size": block.get("size", None),
                "tx_count_in_block": block["tx_count"],

                # Transaction-level info
                "tx_hash": tx.get("hash"),
                "from": tx.get("from"),
                "to": tx.get("to"),
                "value_eth": float(tx.get("value", 0)),
                "gas": tx.get("gas", None),
                "gas_price": tx.get("gas_price", None),
                "input": tx.get("input", None),
                "nonce": tx.get("nonce", None),
                "transaction_type": tx.get("transaction_type", None),
                "max_fee_per_gas": tx.get("max_fee_per_gas", None),
                "max_priority_fee_per_gas": tx.get("max_priority_fee_per_gas", None)
            })

# Create DataFrame
df = pd.DataFrame(all_tx)

# Convert timestamp to datetime
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s", errors="coerce")

# Add extra fields for analysis
df["date"] = df["timestamp"].dt.date
df["hour"] = df["timestamp"].dt.hour
df["weekday"] = df["timestamp"].dt.day_name()

# Save to CSV
df.to_csv("gensyn_transactions_flat_full.csv", index=False)

# Preview
print(f"✅ Loaded {len(df):,} transactions with extended metadata.")
print(df.head())
