# Download Hyperliquid Data from S3

Utilities for downloading data from Hyperliquid's S3 buckets.

In [None]:
import subprocess
import json
import msgpack
import lz4.frame
from pathlib import Path
from datetime import datetime

AWS_PROFILE = 'trevor'
OUTPUT_DIR = Path('../hyperliquid_samples')

## S3 Bucket Structure

```
s3://hl-mainnet-node-data/
├── explorer_blocks/          # Raw blocks (Feb 2023+)
├── node_trades/hourly/       # Parsed trades (Mar 2025+)
├── node_fills/hourly/        # Fills + PnL (May 2025+)
└── replica_cmds/             # Raw L1 commands (Jan 2025+)

s3://hyperliquid-archive/
├── market_data/              # L2 orderbook (Apr 2023+)
└── asset_ctxs/               # Asset contexts
```

In [None]:
def s3_ls(path):
    """List S3 path contents"""
    cmd = f'AWS_PROFILE={AWS_PROFILE} aws s3 ls "{path}" --request-payer requester'
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    return result.stdout.strip().split('\n')

def s3_cp(src, dst):
    """Copy file from S3"""
    cmd = f'AWS_PROFILE={AWS_PROFILE} aws s3 cp "{src}" "{dst}" --request-payer requester'
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    return result.returncode == 0

## List Available Data

In [None]:
# List top-level prefixes in node-data bucket
print("hl-mainnet-node-data contents:")
for line in s3_ls('s3://hl-mainnet-node-data/'):
    print(f"  {line}")

In [None]:
# List explorer_blocks prefixes (block ranges)
print("explorer_blocks prefixes:")
for line in s3_ls('s3://hl-mainnet-node-data/explorer_blocks/'):
    print(f"  {line}")

In [None]:
# Check node_fills date range
print("node_fills earliest dates:")
for line in s3_ls('s3://hl-mainnet-node-data/node_fills/hourly/')[:5]:
    print(f"  {line}")

## Download Explorer Block

In [None]:
def download_explorer_block(block_num, output_dir=None):
    """
    Download explorer block file containing the given block number.
    Files are batched by 100 blocks.
    """
    output_dir = Path(output_dir or OUTPUT_DIR / 'explorer_blocks')
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Calculate file path
    # Files are named by ending block, batched by 100
    file_block = ((block_num // 100) + 1) * 100
    prefix_100m = (file_block // 100_000_000) * 100_000_000
    prefix_100k = (file_block // 100_000) * 100_000
    
    s3_path = f's3://hl-mainnet-node-data/explorer_blocks/{prefix_100m}/{prefix_100k}/{file_block}.rmp.lz4'
    local_path = output_dir / f'{file_block}.rmp.lz4'
    
    print(f"Downloading {s3_path}...")
    if s3_cp(s3_path, str(local_path)):
        print(f"Saved to {local_path}")
        return local_path
    else:
        print("Download failed")
        return None

In [None]:
# Example: Download blocks around block 500,000,000
# download_explorer_block(500_000_000)

## Download Node Fills

In [None]:
def download_node_fills(date_str, hour=None, output_dir=None):
    """
    Download node_fills for a specific date (YYYYMMDD) and optionally hour (00-23).
    """
    output_dir = Path(output_dir or OUTPUT_DIR / 'node_fills')
    output_dir.mkdir(parents=True, exist_ok=True)
    
    if hour is not None:
        s3_path = f's3://hl-mainnet-node-data/node_fills/hourly/{date_str}/{hour:02d}/'
    else:
        s3_path = f's3://hl-mainnet-node-data/node_fills/hourly/{date_str}/'
    
    # List files
    files = s3_ls(s3_path)
    print(f"Found {len(files)} files in {s3_path}")
    
    for line in files[:5]:  # Show first 5
        print(f"  {line}")
    
    return files

In [None]:
# Example: Check fills for a recent date
# download_node_fills('20251101', hour=12)

## Decompress and Parse

In [None]:
def decompress_lz4(filepath):
    """Decompress LZ4 file"""
    filepath = Path(filepath)
    with open(filepath, 'rb') as f:
        return lz4.frame.decompress(f.read())

def parse_msgpack(data):
    """Parse MessagePack data"""
    return msgpack.unpackb(data, raw=False)

def parse_jsonl(data):
    """Parse JSONL (newline-delimited JSON)"""
    lines = data.decode().strip().split('\n')
    return [json.loads(line) for line in lines]

def load_explorer_block(filepath):
    """Load explorer block from .rmp.lz4 file"""
    data = decompress_lz4(filepath)
    return parse_msgpack(data)

def load_node_fills(filepath):
    """Load node_fills from .lz4 file"""
    data = decompress_lz4(filepath)
    return parse_jsonl(data)

In [None]:
# Example usage:
# blocks = load_explorer_block('../hyperliquid_samples/explorer_blocks/811681900.rmp.lz4')
# print(f"Loaded {len(blocks)} blocks")