# Analysis Pipeline

Analyze local `node_fills_by_block` data.

---

## Setup

In [36]:
import json
import pandas as pd
from pathlib import Path
from datetime import datetime

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

SAMPLES_DIR = Path('../hyperliquid_samples/hl-mainnet-node-data/node_fills_by_block')

---

## Load Local Fills Data

In [37]:
# Load all sample JSON files
fills = []
for path in SAMPLES_DIR.glob('*.json'):
    with open(path) as f:
        fills.extend(json.load(f))

print(f"Loaded {len(fills):,} fills from {len(list(SAMPLES_DIR.glob('*.json')))} files")

Loaded 2,000 fills from 2 files


In [38]:
# Convert to DataFrame with proper types
df = pd.DataFrame(fills)

# Parse types
df['time'] = pd.to_datetime(df['time'], unit='ms')
df['px'] = pd.to_numeric(df['px'])
df['sz'] = pd.to_numeric(df['sz'])
df['closedPnl'] = pd.to_numeric(df['closedPnl'], errors='coerce').fillna(0)
df['fee'] = pd.to_numeric(df['fee'], errors='coerce').fillna(0)

# Add computed columns
df['volume'] = df['px'] * df['sz']
df['is_maker'] = ~df['crossed']
df['is_close'] = df['dir'].str.startswith('Close')
df['is_win'] = (df['closedPnl'] > 0) & df['is_close']

print(f"DataFrame shape: {df.shape}")
df.head()

DataFrame shape: (2000, 24)


Unnamed: 0,coin,px,sz,side,time,startPosition,dir,closedPnl,hash,oid,crossed,fee,tid,feeToken,twapId,user,block_time,cloid,builderFee,builder,volume,is_maker,is_close,is_win
0,SOL,186.1,0.07,B,2025-11-01 12:00:00.072,129.84,Open Long,0.0,0x00000000000000000000000000000000000000000000...,219188562384,True,0.005569,574880777843654,USDC,1323168.0,0x2f79e7993359e37091f8298c9706c75243da65a5,2025-11-01T12:00:00.072708858,,,,13.027,False,False,False
1,SOL,186.1,0.07,A,2025-11-01 12:00:00.072,23.29,Close Long,0.005327,0x00000000000000000000000000000000000000000000...,219188406659,False,-0.00026,574880777843654,USDC,,0xa880d6cc607a05ea617307ab3b0d335e8d8424ee,2025-11-01T12:00:00.072708858,0x00000000000000000000000001c11135,,,13.027,True,True,True
2,kLUNC,0.043634,727.0,B,2025-11-01 12:00:00.072,93324.0,Open Long,0.0,0xd1bb92f72debcd68d335042ea03c6b02010f00dcc8ee...,219188347328,False,-0.000317,310875290034250,USDC,,0x7717a7a245d9f950e586822b8c9b46863ed7bd7e,2025-11-01T12:00:00.072708858,0x3a46f6a9f96fe583be081c8f6f04205d,,,31.721918,True,False,False
3,kLUNC,0.043634,727.0,A,2025-11-01 12:00:00.072,2796963.0,Close Long,-0.292254,0xd1bb92f72debcd68d335042ea03c6b02010f00dcc8ee...,219188563177,True,0.0,310875290034250,USDC,,0x31ca8395cf837de08b24da3f660e77761dfb974b,2025-11-01T12:00:00.072708858,,,,31.721918,False,True,False
4,MEME,0.001559,9906.0,B,2025-11-01 12:00:00.273,52469471.0,Open Long,0.0,0x52a54788e5cf05c5541f042ea03c6e0205ee006e80c2...,219188565409,True,0.0,557384256234142,USDC,,0x31ca8395cf837de08b24da3f660e77761dfb974b,2025-11-01T12:00:00.273834313,,,,15.443454,False,False,False


In [39]:
# Schema overview
print("Column types:")
print(df.dtypes)

Column types:
coin                     object
px                      float64
sz                      float64
side                     object
time             datetime64[ns]
startPosition            object
dir                      object
closedPnl               float64
hash                     object
oid                       int64
crossed                    bool
fee                     float64
tid                       int64
feeToken                 object
twapId                  float64
user                     object
block_time               object
cloid                    object
builderFee               object
builder                  object
volume                  float64
is_maker                   bool
is_close                   bool
is_win                     bool
dtype: object


---

## Analysis 1: Volume by Trader

In [40]:
volume_by_trader = df.groupby('user').agg(
    volume=('volume', 'sum'),
    trades=('volume', 'count')
).sort_values('volume', ascending=False)

print("Top 10 Traders by Volume:")
volume_by_trader.head(10)

Top 10 Traders by Volume:


Unnamed: 0_level_0,volume,trades
user,Unnamed: 1_level_1,Unnamed: 2_level_1
0xb4321b142b2a03ce20fcab2007ff6990b9acba93,65641.241432,59
0x31ca8395cf837de08b24da3f660e77761dfb974b,55475.397371,116
0xecb63caa47c7c4e77f60f1ce858cf28dc2b82b00,54564.314283,20
0x010461c14e146ac35fe42271bdc1134ee31c703a,52130.626872,108
0xb356e5977ae1be0e47f7f44557acf0170e8d0512,42168.9422,12
0xb6a766f531fa8e222f460df11d62b0f84b7b65f3,39284.742,6
0x0fd468a73084daa6ea77a9261e40fdec3e67e0c7,38580.03301,28
0xc926ddba8b7617dbc65712f20cf8e1b58b8598d3,36310.77639,161
0x9e74a6a1df3c2545ec4e8e54a1502967c7ad15e1,36301.50796,18
0x335f45392f8d87745aaae68f5c192849afd9b60e,28240.75903,17


---

## Analysis 2: PnL by Trader

In [41]:
pnl_by_trader = df.groupby('user').agg(
    realized_pnl=('closedPnl', 'sum'),
    fees=('fee', 'sum')
).assign(
    net_pnl=lambda x: x['realized_pnl'] - x['fees']
).sort_values('net_pnl', ascending=False)

print("Top 10 Traders by Net PnL:")
pnl_by_trader.head(10)

Top 10 Traders by Net PnL:


Unnamed: 0_level_0,realized_pnl,fees,net_pnl
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0xfcc001bc1db0db6fec6cb5c664b39b3a2224cda2,2109.049459,0.688439,2108.361019
0xb83de012dba672c76a7dbbbf3e459cb59d7d6e36,1205.134485,4.198478,1200.936007
0xb4321b142b2a03ce20fcab2007ff6990b9acba93,309.735126,2.829663,306.905463
0xef88c427254241fcf98466cbc58150c9b5bfb734,248.121537,0.68418,247.437357
0xf3caec115dc53edbcdc5166fc8942b50cfd4623a,251.563044,8.888159,242.674885
0x6c2e4912a03c663eaa74a0bd0bbc3bc4c7c3d43d,111.954948,0.363002,111.591946
0x5b81b715eb5fdc8943cdb15b517d39ef90f2297d,90.71856,0.168658,90.549902
0xee162a5a60829bd346f0c1ac3514b21fe5f4b290,81.238115,1.615664,79.622451
0xc3eab700281a4416b082b6b896c63d5e4781f66f,69.557078,0.047227,69.509851
0xea73db6c22ca00a06b02be3ee4a37a68c55ee73c,58.37467,0.200988,58.173683


---

## Analysis 3: Maker vs Taker Ratio

In [42]:
maker_ratio = df.groupby('user').agg(
    total_trades=('is_maker', 'count'),
    maker_trades=('is_maker', 'sum')
).assign(
    maker_pct=lambda x: x['maker_trades'] / x['total_trades'] * 100
).sort_values('maker_pct', ascending=False)

print("Top 10 Traders by Maker %:")
maker_ratio[maker_ratio['total_trades'] >= 10].head(10)

Top 10 Traders by Maker %:


Unnamed: 0_level_0,total_trades,maker_trades,maker_pct
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0x8b2bb0c842cd3737849079723105fc11da453f91,31,31,100.0
0x95ec5e928c0a71815ec6ec148991a5a607b9b88a,21,21,100.0
0x9e74a6a1df3c2545ec4e8e54a1502967c7ad15e1,18,18,100.0
0x5b81b715eb5fdc8943cdb15b517d39ef90f2297d,23,23,100.0
0xdacb0c5b92766b286db282ea42b3d7ebdc2423f9,14,14,100.0
0xf0480a7892bc260658e82359f3c9374bc0a75444,45,45,100.0
0xbf1935fe7ab6d0aa3ee8d3da47c2f80e215b2a1c,26,26,100.0
0xc37b5d9a854de08c5d74522b726c316833e6a047,10,10,100.0
0x335f45392f8d87745aaae68f5c192849afd9b60e,17,17,100.0
0x4129c62faf652fea61375dcd9ca8ce24b2bb8b95,12,12,100.0


---

## Analysis 4: Win Rate

In [43]:
# Only consider closing trades
closes = df[df['is_close']]

win_rate = closes.groupby('user').agg(
    total_closes=('is_win', 'count'),
    wins=('is_win', 'sum')
).assign(
    win_rate=lambda x: x['wins'] / x['total_closes'] * 100
).sort_values('win_rate', ascending=False)

print("Top 10 Traders by Win Rate (min 5 closes):")
win_rate[win_rate['total_closes'] >= 5].head(10)

Top 10 Traders by Win Rate (min 5 closes):


Unnamed: 0_level_0,total_closes,wins,win_rate
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0xb83de012dba672c76a7dbbbf3e459cb59d7d6e36,9,9,100.0
0xa3652867632777bd42f12561d28bfd2064a11721,5,5,100.0
0xf0480a7892bc260658e82359f3c9374bc0a75444,22,22,100.0
0xef88c427254241fcf98466cbc58150c9b5bfb734,5,5,100.0
0xdacb0c5b92766b286db282ea42b3d7ebdc2423f9,5,5,100.0
0xbf1935fe7ab6d0aa3ee8d3da47c2f80e215b2a1c,9,9,100.0
0x335f45392f8d87745aaae68f5c192849afd9b60e,17,17,100.0
0xee162a5a60829bd346f0c1ac3514b21fe5f4b290,34,32,94.117647
0x0a06ec6754b628be489b2c40bba20c8580392a7b,11,10,90.909091
0x223537ac9a856c31f4043e86ced86bb29f06653e,9,8,88.888889


---

## Analysis 5: Volume by Coin

In [44]:
volume_by_coin = df.groupby('coin').agg(
    volume=('volume', 'sum'),
    trades=('volume', 'count'),
    unique_traders=('user', 'nunique')
).sort_values('volume', ascending=False)

print("Top 10 Coins by Volume:")
volume_by_coin.head(10)

Top 10 Coins by Volume:


Unnamed: 0_level_0,volume,trades,unique_traders
coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ETH,293356.07424,128,56
BTC,197954.32666,268,72
ZEC,128987.7804,58,14
xyz:META,58090.98776,292,12
@188,46452.192315,18,5
TAO,32222.97236,20,9
@107,31109.42962,34,10
HYPE,30711.70624,144,71
MON,27994.632108,46,25
@254,25847.339802,156,25


---

## Analysis 6: Trader Profile (Combined Metrics)

In [45]:
def trader_profile(df):
    closes = df[df['is_close']]
    
    profile = df.groupby('user').agg(
        volume=('volume', 'sum'),
        trades=('volume', 'count'),
        realized_pnl=('closedPnl', 'sum'),
        fees=('fee', 'sum'),
        maker_trades=('is_maker', 'sum'),
        coins_traded=('coin', 'nunique')
    )
    
    # Add win rate from closes
    win_stats = closes.groupby('user').agg(
        total_closes=('is_win', 'count'),
        wins=('is_win', 'sum')
    )
    
    profile = profile.join(win_stats)
    profile['net_pnl'] = profile['realized_pnl'] - profile['fees']
    profile['maker_pct'] = profile['maker_trades'] / profile['trades'] * 100
    profile['win_rate'] = profile['wins'] / profile['total_closes'] * 100
    
    return profile.sort_values('net_pnl', ascending=False)

profiles = trader_profile(df)
print("Trader Profiles (Top 10 by Net PnL):")
profiles[['volume', 'trades', 'net_pnl', 'maker_pct', 'win_rate', 'coins_traded']].head(10)

Trader Profiles (Top 10 by Net PnL):


Unnamed: 0_level_0,volume,trades,net_pnl,maker_pct,win_rate,coins_traded
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0xfcc001bc1db0db6fec6cb5c664b39b3a2224cda2,2109.18914,1,2108.361019,100.0,,1
0xb83de012dba672c76a7dbbbf3e459cb59d7d6e36,19992.77293,9,1200.936007,0.0,100.0,1
0xb4321b142b2a03ce20fcab2007ff6990b9acba93,65641.241432,59,306.905463,18.644068,83.333333,9
0xef88c427254241fcf98466cbc58150c9b5bfb734,1583.756037,5,247.437357,0.0,100.0,1
0xf3caec115dc53edbcdc5166fc8942b50cfd4623a,22220.397163,8,242.674885,100.0,,1
0x6c2e4912a03c663eaa74a0bd0bbc3bc4c7c3d43d,2160.732356,4,111.591946,0.0,66.666667,3
0x5b81b715eb5fdc8943cdb15b517d39ef90f2297d,992.105276,23,90.549902,100.0,,1
0xee162a5a60829bd346f0c1ac3514b21fe5f4b290,20063.065064,38,79.622451,31.578947,94.117647,8
0xc3eab700281a4416b082b6b896c63d5e4781f66f,122.987931,3,69.509851,100.0,,1
0xea73db6c22ca00a06b02be3ee4a37a68c55ee73c,558.29904,1,58.173683,100.0,,1


---

## Summary Statistics

In [46]:
print("Dataset Summary")
print("=" * 40)
print(f"Total fills: {len(df):,}")
print(f"Unique traders: {df['user'].nunique():,}")
print(f"Unique coins: {df['coin'].nunique():,}")
print(f"Total volume: ${df['volume'].sum():,.0f}")
print(f"Date range: {df['time'].min()} to {df['time'].max()}")

Dataset Summary
Total fills: 2,000
Unique traders: 421
Unique coins: 112
Total volume: $1,183,044
Date range: 2025-11-01 12:00:00.072000 to 2025-11-28 12:00:02.814000
