
# ⚡ Latency Simulator — Tail-at-Scale & Trading Impact

_Date generated: 2025-09-03_

This notebook simulates **end-to-end order latency**, queueing effects, and the **P&L impact** of latency tails.

**What you get**
- Synthetic latency pipeline (network → gateway → risk → venue ACK)
- **Microburst traffic** (Poisson arrivals), lognormal service times
- Queueing models: **M/M/1** response vs utilization
- SLO tracking (e.g., **p99 < 50 ms**), error-budget burn
- Fill/alpha decay model → $$ cost of latency
- A/B scenarios: baseline vs **co-location optimization**


## 0) Parameters

In [None]:

N_REQUESTS = 50_000

# Pipeline components (lognormal ms)
NET_MEAN_MS, NET_SIGMA = 2.5, 0.35
GW_MEAN_MS, GW_SIGMA   = 1.5, 0.30
RISK_MEAN_MS, RISK_SIG = 3.0, 0.40
VENUE_MEAN_MS, VENUE_SIG = 5.0, 0.45

# Arrival process
LAMBDA_RPS = 600   # average requests per second
BURST_FACTOR = 3.0 # multiplier during bursts
BURST_PROB = 0.08  # prob a given second is a burst second

# SLO
SLO_PERC = 99.0
SLO_THRESH_MS = 50.0

# Cost model
ALPHA_DECAY_BP_PER_MS = 0.002   # bp of edge lost per extra ms vs competitor
COMPETITOR_LAT_MS = 10.0        # fixed competitor latency (for model)
NOTIONAL_PER_ORDER = 50_000     # $ notional per order
FILL_SENSITIVITY = 0.015        # prob of losing fill per extra ms (soft)

SEED = 7


## 1) Setup & Helpers

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

rng = np.random.default_rng(SEED)

def lognormal_ms(mean_ms, sigma):
    # Convert desired mean to lognormal parameters
    # mean = exp(mu + 0.5*sigma^2) => mu = ln(mean) - 0.5*sigma^2
    mu = np.log(mean_ms) - 0.5*(sigma**2)
    return np.exp(rng.normal(mu, sigma))

def percentile(x, p):
    return np.percentile(x, p)

def series_percentiles(x: pd.Series, ps=(50,95,99)):
    return {f"p{p}": np.percentile(x, p) for p in ps}

def rolling_percentile(x: pd.Series, p=99, win=500):
    return x.rolling(win).apply(lambda s: np.percentile(s, p), raw=True)

def mm1_response_time(lambda_rate, mu_rate):
    # R = 1/(mu - lambda)
    rho = lambda_rate / mu_rate
    if rho >= 1.0: 
        return np.inf, rho
    return 1.0/(mu_rate - lambda_rate), rho


## 2) Generate Requests with Microbursts

In [None]:

# Assign each request to a second; some seconds are bursty
n_seconds = int(np.ceil(N_REQUESTS / LAMBDA_RPS)) + 5
is_burst = rng.random(n_seconds) < BURST_PROB
per_sec_rate = np.where(is_burst, LAMBDA_RPS*BURST_FACTOR, LAMBDA_RPS)

# Draw counts each second
counts = rng.poisson(per_sec_rate)
counts[0] += max(0, N_REQUESTS - counts.sum())  # adjust to hit ~N_REQUESTS
counts = np.cumsum(counts)
ts = []
for s in range(1, len(counts)):
    n = counts[s] - counts[s-1]
    if n <= 0: continue
    # uniform arrivals within the second s
    offs = rng.random(n)
    ts.extend(list((s + offs)))
t = pd.Series(sorted(ts))[:N_REQUESTS]
t.index.name = "request_id"
t.name = "arrival_sec"
t.head(), len(t)


## 3) Draw Component Latencies & Compose E2E

In [None]:

def draw_pipeline(n, params):
    comps = {}
    for name,(m,s) in params.items():
        comps[name] = np.array([lognormal_ms(m, s) for _ in range(n)])
    df = pd.DataFrame(comps)
    df["latency_ms"] = df.sum(axis=1)
    return df

params = {
    "net_ms": (NET_MEAN_MS, NET_SIGMA),
    "gw_ms": (GW_MEAN_MS, GW_SIGMA),
    "risk_ms": (RISK_MEAN_MS, RISK_SIG),
    "venue_ms": (VENUE_MEAN_MS, VENUE_SIG),
}

lat = draw_pipeline(N_REQUESTS, params)
lat["arrival_sec"] = t.values
lat = lat.sort_values("arrival_sec").reset_index(drop=True)
lat.head()


## 4) Latency Stats & Charts

In [None]:

p = series_percentiles(lat["latency_ms"], ps=(50,90,95,99,99.9))
p


In [None]:

plt.figure(figsize=(10,3.5))
lat["latency_ms"].hist(bins=100)
plt.title("E2E Latency Histogram (ms)")
plt.tight_layout(); plt.show()

plt.figure(figsize=(10,3.5))
cdf = np.sort(lat["latency_ms"].values) # type: ignore
y = np.linspace(0,1,len(cdf))
plt.plot(cdf, y)
plt.title("Latency CDF")
plt.tight_layout(); plt.show()


## 5) SLO Tracking & Error-Budget Burn

In [None]:

win = 1000
p99 = rolling_percentile(lat["latency_ms"], p=SLO_PERC, win=win) # type: ignore
slo_breach = (p99 > SLO_THRESH_MS).astype(int)

plt.figure(figsize=(10,3.5))
plt.plot(p99.index, p99.values) # type: ignore
plt.axhline(SLO_THRESH_MS, linestyle='--')
plt.title(f"Rolling p{int(SLO_PERC)} (window={win}) vs SLO")
plt.tight_layout(); plt.show()

burn = slo_breach.rolling(2000).mean()
plt.figure(figsize=(10,3))
plt.plot(burn.index, burn.values) # type: ignore
plt.title("SLO Breach Rate (rolling)")
plt.tight_layout(); plt.show()


## 6) M/M/1 Response Time vs Utilization

In [None]:

# Approximate service rate (per ms -> per sec). Use mean of gateway stage as bottleneck example.
service_ms = lat["gw_ms"].mean()
mu_rate = 1000.0 / service_ms   # per second
util = np.linspace(0.1, 0.99, 50)
resp = []
for rho in util:
    lam = rho * mu_rate
    R, _ = mm1_response_time(lam, mu_rate)
    resp.append(R*1000.0) # sec->ms

plt.figure(figsize=(10,3.5))
plt.plot(util, resp)
plt.title("M/M/1 Mean Response Time vs Utilization (gateway stage)")
plt.xlabel("Utilization ρ"); plt.ylabel("Response Time (ms)")
plt.tight_layout(); plt.show()


## 7) Cost of Latency — Fill Loss & Alpha Decay

In [None]:

delta_ms = np.clip(lat["latency_ms"] - COMPETITOR_LAT_MS, 0, None)
# probability of losing the fill due to being slower
p_lose = 1 - np.exp(-FILL_SENSITIVITY * delta_ms)
# expected alpha lost (bp) conditional on fill
alpha_lost_bp = ALPHA_DECAY_BP_PER_MS * delta_ms
# expected cost per order in $
expected_cost = (p_lose * 0.5 + (1 - p_lose) * (alpha_lost_bp/10000.0)) * NOTIONAL_PER_ORDER
lat["delta_ms"] = delta_ms
lat["p_lose"] = p_lose
lat["cost_$"] = expected_cost

lat[["latency_ms","delta_ms","p_lose","cost_$"]].head()


In [None]:

plt.figure(figsize=(10,3.5))
plt.scatter(lat["latency_ms"][::200], lat["cost_$"][::200], s=8)
plt.title("Latency vs Expected Cost per Order ($)")
plt.tight_layout(); plt.show()

total_cost = lat["cost_$"].sum()
total_cost


## 8) A/B Scenario — Co-Location Optimization

In [None]:

def apply_colo(df: pd.DataFrame, net_factor=0.4, venue_factor=0.8):
    df2 = df.copy()
    df2["net_ms"] = df2["net_ms"] * net_factor
    df2["venue_ms"] = df2["venue_ms"] * venue_factor
    df2["latency_ms"] = df2[["net_ms","gw_ms","risk_ms","venue_ms"]].sum(axis=1)
    return df2

lat_colo = apply_colo(lat, net_factor=0.35, venue_factor=0.75)
delta_ms2 = np.clip(lat_colo["latency_ms"] - COMPETITOR_LAT_MS, 0, None)
p_lose2 = 1 - np.exp(-FILL_SENSITIVITY * delta_ms2)
expected_cost2 = (p_lose2 * 0.5 + (1 - p_lose2) * (ALPHA_DECAY_BP_PER_MS * delta_ms2/10000.0)) * NOTIONAL_PER_ORDER
lat_colo["cost_$"] = expected_cost2

base_cost = lat["cost_$"].sum()
colo_cost = lat_colo["cost_$"].sum()
savings = base_cost - colo_cost
{"base_total_cost_$": float(base_cost), "colo_total_cost_$": float(colo_cost), "savings_$": float(savings)}


In [None]:

# Compare distributions
plt.figure(figsize=(10,3.5))
lat["latency_ms"].plot(kind="kde", label="Base")
lat_colo["latency_ms"].plot(kind="kde", label="Co-lo")
plt.title("Latency Density: Base vs Co-lo")
plt.legend(); plt.tight_layout(); plt.show()

plt.figure(figsize=(10,3.5))
plt.hist(lat["cost_$"], bins=80, alpha=0.6, label="Base")
plt.hist(lat_colo["cost_$"], bins=80, alpha=0.6, label="Co-lo")
plt.title("Per-Order Expected Cost Distribution")
plt.legend(); plt.tight_layout(); plt.show()


## 9) Export Summary

In [None]:

summary = pd.DataFrame({
    "metric": ["p50_ms","p95_ms","p99_ms","p999_ms","total_cost_$","total_cost_colo_$","savings_$"],
    "value": [
        np.percentile(lat["latency_ms"],50),
        np.percentile(lat["latency_ms"],95),
        np.percentile(lat["latency_ms"],99),
        np.percentile(lat["latency_ms"],99.9),
        base_cost, colo_cost, savings
    ]
})
out_path = "reports/latency_summary.csv"
import os
os.makedirs("reports", exist_ok=True)
summary.to_csv(out_path, index=False)
out_path
