# Benchmark Stress Test


In [24]:
import random
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone

import httpx
import pandas as pd
import plotly.express as px

## Fit Benchmark


In [17]:
# Config
BASE_URL = "http://localhost:8000"
ENDPOINT = "/fit/{series_id}"

WORKERS = 100
TOTAL_CALLS = 100
REQUEST_TIMEOUT_SECONDS = 60.0

print({
    "base_url": BASE_URL,
    "workers": WORKERS,
    "total_calls": TOTAL_CALLS,
    "timeout_s": REQUEST_TIMEOUT_SECONDS,
})

{'base_url': 'http://localhost:8000', 'workers': 100, 'total_calls': 100, 'timeout_s': 60.0}


In [18]:
# Shared structures
results = []
results_lock = threading.Lock()
run_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")

def build_payload(seed: int) -> dict:
    # Valid training payload: increasing timestamps + non-constant values
    start_ts = 1700000000 + seed * 10
    timestamps = [start_ts + i for i in range(6)]
    base = 10.0 + (seed % 5)
    values = [
        base + random.uniform(-0.8, 0.8),
        base + random.uniform(-0.8, 0.8),
        base + random.uniform(-0.8, 0.8),
        base + random.uniform(-0.8, 0.8),
        base + random.uniform(-0.8, 0.8),
        base + random.uniform(-0.8, 0.8),
    ]
    # Avoid accidental all-equal values
    if len(set(round(v, 6) for v in values)) == 1:
        values[-1] += 0.001

    return {"timestamps": timestamps, "values": values}

def call_train(call_index: int) -> dict:
    series_id = f"benchmark_stress_{run_id}_{call_index}"
    payload = build_payload(call_index)
    url = BASE_URL + ENDPOINT.format(series_id=series_id)

    t0 = time.perf_counter()
    start_epoch_ms = int(time.time() * 1000)

    try:
        with httpx.Client(timeout=REQUEST_TIMEOUT_SECONDS) as client:
            response = client.post(url, json=payload)
        ok = response.status_code == 200
        body = response.text[:500]
        error = None
    except Exception as exc:
        response = None
        ok = False
        body = None
        error = str(exc)

    end_epoch_ms = int(time.time() * 1000)
    delta_ms = (time.perf_counter() - t0) * 1000.0

    row = {
        "call_index": call_index,
        "series_id": series_id,
        "status_code": response.status_code if response is not None else None,
        "ok": ok,
        "delta_ms": delta_ms,
        "start_epoch_ms": start_epoch_ms,
        "end_epoch_ms": end_epoch_ms,
        "error": error,
        "body_preview": body,
    }

    with results_lock:
        results.append(row)

    return row

In [19]:
# Run benchmark
wall_start = time.perf_counter()

futures = []
with ThreadPoolExecutor(max_workers=WORKERS) as executor:
    for i in range(TOTAL_CALLS):
        futures.append(executor.submit(call_train, i))

    for _future in as_completed(futures):
        pass

wall_delta_s = time.perf_counter() - wall_start
print(f"Finished {TOTAL_CALLS} calls with {WORKERS} workers in {wall_delta_s:.2f}s")

Finished 100 calls with 100 workers in 4.40s


In [20]:
# Build dataframe + summary
df = pd.DataFrame(results).sort_values(by="call_index").reset_index(drop=True)

display(df.head())

summary = {
    "total_calls": int(len(df)),
    "success_count": int(df["ok"].sum()),
    "error_count": int((~df["ok"]).sum()),
    "min_ms": float(df["delta_ms"].min()),
    "p50_ms": float(df["delta_ms"].quantile(0.50)),
    "p95_ms": float(df["delta_ms"].quantile(0.95)),
    "max_ms": float(df["delta_ms"].max()),
    "mean_ms": float(df["delta_ms"].mean()),
}
summary

Unnamed: 0,call_index,series_id,status_code,ok,delta_ms,start_epoch_ms,end_epoch_ms,error,body_preview
0,0,benchmark_stress_20260218_010149_0,200,True,3935.38689,1771376509100,1771376513036,,"{""series_id"":""benchmark_stress_20260218_010149..."
1,1,benchmark_stress_20260218_010149_1,200,True,3002.489549,1771376509101,1771376512104,,"{""series_id"":""benchmark_stress_20260218_010149..."
2,2,benchmark_stress_20260218_010149_2,200,True,3365.220187,1771376509103,1771376512468,,"{""series_id"":""benchmark_stress_20260218_010149..."
3,3,benchmark_stress_20260218_010149_3,200,True,2401.021757,1771376509104,1771376511505,,"{""series_id"":""benchmark_stress_20260218_010149..."
4,4,benchmark_stress_20260218_010149_4,200,True,3595.020457,1771376509105,1771376512700,,"{""series_id"":""benchmark_stress_20260218_010149..."


{'total_calls': 100,
 'success_count': 100,
 'error_count': 0,
 'min_ms': 2401.021756999853,
 'p50_ms': 3860.7527019998997,
 'p95_ms': 4278.607239799726,
 'max_ms': 4370.657037000001,
 'mean_ms': 3744.7802256099753}

In [21]:
# Latency distribution
fig_hist = px.histogram(
    df,
    x="delta_ms",
    nbins=30,
    title="Training API Latency Distribution",
    labels={"delta_ms": "Latency (ms)"},
)
fig_hist.show()

fig_box = px.box(
    df,
    y="delta_ms",
    points="all",
    title="Training API Latency (Box Plot)",
    labels={"delta_ms": "Latency (ms)"},
)
fig_box.show()

## Predict Benchmark


In [None]:
# Predict config
PREDICT_ENDPOINT = "/predict/{series_id}"
PREDICT_WORKERS = 100
PREDICT_TOTAL_CALLS = 100
PREDICT_TIMEOUT_SECONDS = 30.0

print({
    "predict_endpoint": PREDICT_ENDPOINT,
    "predict_workers": PREDICT_WORKERS,
    "predict_total_calls": PREDICT_TOTAL_CALLS,
    "predict_timeout_s": PREDICT_TIMEOUT_SECONDS,
})


In [None]:
# Setup a trained series for predict benchmark
predict_run_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
predict_series_id = f"benchmark_predict_{predict_run_id}"
fit_url = BASE_URL + ENDPOINT.format(series_id=predict_series_id)

fit_payload = {
    "timestamps": [1705000000 + i for i in range(8)],
    "values": [10.0, 10.2, 10.1, 9.9, 10.3, 10.15, 9.95, 10.25],
}

with httpx.Client(timeout=PREDICT_TIMEOUT_SECONDS) as client:
    fit_response = client.post(fit_url, json=fit_payload)

print("predict_series_id:", predict_series_id)
print("fit status:", fit_response.status_code)
if fit_response.status_code != 200:
    print("fit body:", fit_response.text[:500])


In [None]:
# Predict benchmark workers
predict_results = []
predict_lock = threading.Lock()

def call_predict(call_index: int) -> dict:
    url = BASE_URL + PREDICT_ENDPOINT.format(series_id=predict_series_id)
    payload = {
        "timestamp": str(1705001000 + call_index),
        "value": 10.0 + random.uniform(-1.5, 3.0),
    }

    t0 = time.perf_counter()
    start_epoch_ms = int(time.time() * 1000)

    try:
        with httpx.Client(timeout=PREDICT_TIMEOUT_SECONDS) as client:
            response = client.post(url, params={"version": "0"}, json=payload)
        ok = response.status_code == 200
        body = response.text[:500]
        error = None
    except Exception as exc:
        response = None
        ok = False
        body = None
        error = str(exc)

    end_epoch_ms = int(time.time() * 1000)
    delta_ms = (time.perf_counter() - t0) * 1000.0

    row = {
        "call_index": call_index,
        "series_id": predict_series_id,
        "status_code": response.status_code if response is not None else None,
        "ok": ok,
        "delta_ms": delta_ms,
        "start_epoch_ms": start_epoch_ms,
        "end_epoch_ms": end_epoch_ms,
        "error": error,
        "body_preview": body,
    }

    with predict_lock:
        predict_results.append(row)

    return row


In [None]:
# Run predict benchmark
predict_wall_start = time.perf_counter()

predict_futures = []
with ThreadPoolExecutor(max_workers=PREDICT_WORKERS) as executor:
    for i in range(PREDICT_TOTAL_CALLS):
        predict_futures.append(executor.submit(call_predict, i))

    for _future in as_completed(predict_futures):
        pass

predict_wall_delta_s = time.perf_counter() - predict_wall_start
print(f"Finished {PREDICT_TOTAL_CALLS} predict calls with {PREDICT_WORKERS} workers in {predict_wall_delta_s:.2f}s")


In [None]:
# Predict dataframe + summary
predict_df = pd.DataFrame(predict_results).sort_values(by="call_index").reset_index(drop=True)
display(predict_df.head())

predict_summary = {
    "total_calls": int(len(predict_df)),
    "success_count": int(predict_df["ok"].sum()),
    "error_count": int((~predict_df["ok"]).sum()),
    "min_ms": float(predict_df["delta_ms"].min()),
    "p50_ms": float(predict_df["delta_ms"].quantile(0.50)),
    "p95_ms": float(predict_df["delta_ms"].quantile(0.95)),
    "max_ms": float(predict_df["delta_ms"].max()),
    "mean_ms": float(predict_df["delta_ms"].mean()),
}
predict_summary


In [None]:
# Predict latency plots
fig_predict_hist = px.histogram(
    predict_df,
    x="delta_ms",
    nbins=30,
    title="Predict API Latency Distribution",
    labels={"delta_ms": "Latency (ms)"},
)
fig_predict_hist.show()

fig_predict_box = px.box(
    predict_df,
    y="delta_ms",
    points="all",
    title="Predict API Latency (Box Plot)",
    labels={"delta_ms": "Latency (ms)"},
)
fig_predict_box.show()

