# From Square-Root Law to Dynamic Trade Effects

## 1. The dataset

From binance cryptocurrency api via the binance-LOB repository (https://github.com/pfei-sa/binance-LOB/tree/main)

Quotes data with a depth of 100 into the LOB timestamp, ask price, ask volume, bid price, bid volume, midpoint, spread

Trades data withtimestamp, price, volume, trade sign (-1 = sell, 1 = buy)

In [None]:
# Fetch LOB data from Binance 
# https://github.com/pfei-sa/binance-LOB.git

import os
os.chdir('/home/smn/repos/fintech_price_impact/binance-LOB/')
from replay import orderbook_generator, get_snapshots_update_ids
import pandas as pd
import numpy as np

snapshot_ids = get_snapshots_update_ids("BTCUSDT")

lob_depth = 100
batch_size = 1000
batch_data = []

# Build db structure from order book 
# Each iter is a new row.
for orderbook in orderbook_generator(0, "BTCUSDT", block_size=5000):

    timestamp = orderbook.timestamp

    asks = orderbook.asks
    ask_price = []
    ask_volume = []
    for key, value in asks.items():
        ask_price.append(key)
        ask_volume.append(value)

    bid_price = []
    bid_volume = []
    bids = orderbook.bids
    for key, value in bids.items():
        bid_price.append(key)
        bid_volume.append(value)

    best_ask = ask_price[0]
    best_bid = bid_price[0]

    row = {'timestamp': timestamp}
    for i in range(len(ask_price)):
        row[f'ask_price_depth_{i}'] = ask_price[i]
        row[f'ask_volume_depth_{i}'] = ask_volume[i]

    if len(ask_price) > lob_depth:
        ask_price = ask_price[:lob_depth]
        ask_volume = ask_volume[:lob_depth]
    else:
        while len(ask_price)<lob_depth:
            ask_price.append(np.nan)

    for i in range(len(bid_price)):
        row[f'bid_price_depth_{i}'] = bid_price[i]
        row[f'bid_volume_depth_{i}'] = bid_volume[i]
    row['midpoint'] = (best_ask + best_bid) / 2
    row['spread'] = best_ask - best_bid

    if len(bid_price) > lob_depth:
        bid_price = bid_price[:lob_depth]
        bid_volume = bid_volume[:lob_depth]
    else:
        while len(bid_price)<lob_depth:
            bid_price.append(np.nan)
    
    batch_data.append(row)

    if len(batch_data) >= batch_size:
        batch_df = pd.DataFrame(batch_data)
        batch_df.to_csv(f'batch_{i//batch_size}.csv', index=False)
        batch_data = []

[]


In [5]:
from replay import diff_depth_stream_generator

snapshot_ids = get_snapshots_update_ids("BTCUSDT")

trades = []

for diff_data in diff_depth_stream_generator(0, 'BTCUSDT', block_size = 5000): 
    timestamp, first_update_id, final_update_id, diff_bids_volume, diff_bids_price, diff_asks_volume, diff_asks_price, symbol = diff_data

    # BIDS
    for price, quantity in zip(diff_bids_price, diff_bids_volume): 
        trades.append({
            'timestamp': timestamp,
            'trade_price': price,
            'trade_volume': abs(quantity),
            'trade_sign': -1,
            'update_id': final_update_id
        })

    # ASKS
    for price, quantity in zip(diff_asks_price, diff_asks_volume): 
        trades.append({
            'timestamp': timestamp,
            'trade_price': price,
            'trade_volume': abs(quantity),
            'trade_sign': 1,
            'update_id': final_update_id
        })

trades_df = pd.DataFrame(trades)
trades_df.to_csv(f'trades.csv', index=False)
trades_df.head()


Unnamed: 0,timestamp,trade_price,trade_volume,trade_sign,update_id
0,2025-08-01 08:33:46.014,114474.37,3.485,-1,73833846823
1,2025-08-01 08:33:46.014,114471.57,0.31466,-1,73833846823
2,2025-08-01 08:33:46.014,114471.32,0.0,-1,73833846823
3,2025-08-01 08:33:46.014,114471.22,0.1801,-1,73833846823
4,2025-08-01 08:33:46.014,114471.02,5e-05,-1,73833846823


# Data analysis

## 1. Baseline Implementation
- Square-Root Law: ΔP = Y σ√(Q/V)
- Parameter estimation and statistical validation
- Identify systematic deviations and failure modes

## 2. Traditional Model Extensions
- Propagator Model: P(t) = ∑G(t-s)ε(s)
- Temporal impact analysis (temporary vs permanent)
- Trade information content effects

## 3. ML Implementation
- Features: Volatility metrics, volume patterns, spreads, order book imbalance, decay patterns from propagator analysis
- Target: Direct price impact ΔP prediction
- Models: Compare ML predictions vs Square-root law vs Propagator model

## 4. Regime-Dependent Analysis
- Identify market conditions where traditional models underperform
- Cross-validation framework across different market regimes
- Performance comparison metrics focusing on model failure cases