diff --git a/docs/benchmarks/windows-throughput.md b/docs/benchmarks/windows-throughput.md index 02ec50443..c9ff54db1 100644 --- a/docs/benchmarks/windows-throughput.md +++ b/docs/benchmarks/windows-throughput.md @@ -102,6 +102,57 @@ BENCH_RUNS=5 BENCH_WARMUP=2 \ bash scripts/windows_throughput_bench.sh ``` +## Drilldown mode + +Set `OC_RSYNC_BENCH_DRILLDOWN=1` to append three per-hotspot +sub-scenarios to the run. They map 1:1 onto the IOCP sync points +catalogued in +[`docs/audits/iocp-sync-blocking-audit.md`](../audits/iocp-sync-blocking-audit.md) +so that a future patch targeting a specific row in that audit can be +attributed to the matching scenario without re-deriving which hotspot +moved. + +| Scenario | What it isolates | Control | Audit rows | +|-------------------------|----------------------------------------------------------------------------------|--------------------------|------------| +| `write_only_iocp` | `IocpWriter` per-IO blocking drain. `--whole-file --inplace` forces every byte through the write path with no temp-file rename. | `cp` (std::fs::copy) | #1, #4, #13 | +| `read_only_iocp` | `IocpReader` per-IO blocking drain. `--dry-run` walks and reads the 1 GiB fixture but writes nothing. | upstream rsync `--dry-run` | #2, #3 | +| `network_only_loopback` | `IocpSocketWriter` / `Reader` send/recv path. Pushes a 1 GiB file between two loopback rsync daemons on the same disk so disk bandwidth cancels out. | upstream rsync loopback daemon | #8 - #11 | + +Invocation: + +```sh +# From an MSYS2 shell on a Windows host. +OC_RSYNC_BENCH_DRILLDOWN=1 \ + BENCH_RUNS=5 BENCH_WARMUP=2 \ + OC_RSYNC=/c/path/to/target/release/oc-rsync.exe \ + bash scripts/windows_throughput_bench.sh +``` + +The drilldown daemons bind `127.0.0.1:$BENCH_DAEMON_PORT` and +`127.0.0.1:$((BENCH_DAEMON_PORT + 1))` (default `18730` / `18731`). +Override `BENCH_DAEMON_PORT` if those ports are in use. + +### Interpretation + +Read each ratio the same way as the main scenarios +(`mean(control) / mean(oc-rsync)`): + +- `write_only_iocp`: regression here points at the write-path changes + in `crates/fast_io/src/iocp/file_writer.rs` and + `crates/fast_io/src/iocp/disk_batch.rs`. The `cp` control caps the + upper bound at NTFS write bandwidth; oc-rsync should land within a + small factor of it. +- `read_only_iocp`: regression here points at `file_reader.rs` or the + generator/sender read pipeline. Because both sides run `--dry-run`, + divergence is not explained by network or fsync work. +- `network_only_loopback`: regression here implicates `socket.rs` or + the multiplex layer. Disk bandwidth is symmetric across both + commands, so the delta reflects send/recv pipelining. + +The drilldown sub-scenarios are **not** in the required-checks list +and have no acceptable-band thresholds; they exist to attribute +movement, not to gate merges. + ## Tuning knobs The reusable workflow exposes these inputs (all optional): diff --git a/scripts/windows_throughput_bench.sh b/scripts/windows_throughput_bench.sh index 976b3f647..5f89ad93f 100755 --- a/scripts/windows_throughput_bench.sh +++ b/scripts/windows_throughput_bench.sh @@ -28,6 +28,26 @@ # BENCH_LARGE_MIB Size of the single large file in MiB (default: 1024) # BENCH_SMALL_COUNT Number of small files (default: 10000) # BENCH_SMALL_KIB Size of each small file in KiB (default: 4) +# +# Drilldown mode (env-gated, OC_RSYNC_BENCH_DRILLDOWN=1): +# Adds three sub-scenarios that isolate the individual IOCP hotspots +# identified in docs/audits/iocp-sync-blocking-audit.md so future Windows +# improvements can be attributed to specific changes: +# - write_only_iocp: forces full-file writes via --whole-file +# --inplace, isolating the IocpWriter per-IO +# blocking drain (audit rows #1, #4, #13). +# Control: native std::fs::copy via `cp`. +# - read_only_iocp: runs oc-rsync with --dry-run against the same +# 1 GiB fixture so the file is mapped/read but +# never written, isolating the IocpReader +# blocking drain (audit rows #2, #3). +# - network_only_loopback: pushes a 1 GiB file through two oc-rsync +# daemons over loopback rsync://, isolating the +# IocpSocketWriter / Reader hot paths (audit +# rows #8-#11) from disk completion costs. +# Optional knobs: +# BENCH_DAEMON_PORT TCP port for the loopback daemon +# (default: 18730) set -eu @@ -51,6 +71,8 @@ BENCH_RUNS="${BENCH_RUNS:-3}" BENCH_LARGE_MIB="${BENCH_LARGE_MIB:-1024}" BENCH_SMALL_COUNT="${BENCH_SMALL_COUNT:-10000}" BENCH_SMALL_KIB="${BENCH_SMALL_KIB:-4}" +OC_RSYNC_BENCH_DRILLDOWN="${OC_RSYNC_BENCH_DRILLDOWN:-0}" +BENCH_DAEMON_PORT="${BENCH_DAEMON_PORT:-18730}" # Tool checks (skip rather than fail to keep CI green on missing deps). if ! command -v hyperfine >/dev/null 2>&1; then @@ -134,5 +156,186 @@ run_scenario() { run_scenario "large_1gib" "$LARGE_SRC" "$LARGE_DST_OC" "$LARGE_DST_UP" run_scenario "small_10000" "$SMALL_SRC" "$SMALL_DST_OC" "$SMALL_DST_UP" +# ---------------------------------------------------------------------------- +# Drilldown mode: per-hotspot isolation (env-gated). +# +# Each sub-scenario keeps the same hyperfine harness, but swaps the +# command pair to neutralise everything except the hotspot under test. +# See docs/audits/iocp-sync-blocking-audit.md for the mapping from +# scenario name to audit row. +# ---------------------------------------------------------------------------- +if [ "$OC_RSYNC_BENCH_DRILLDOWN" = "1" ]; then + log "drilldown mode: OC_RSYNC_BENCH_DRILLDOWN=1" + + if ! command -v cp >/dev/null 2>&1; then + skip "drilldown requires cp (MSYS2 coreutils); not on PATH" + fi + + DRILL_ROOT="$WORKROOT/drilldown" + mkdir -p "$DRILL_ROOT" + + # ------------------------------------------------------------------ + # write_only_iocp + # --whole-file forces a full sender->receiver byte stream (no + # delta), --inplace skips the temp-file + rename so every byte + # lands via IocpWriter. Control is std::fs::copy via `cp`, which + # bypasses oc-rsync entirely and exercises only NTFS write + # bandwidth. The delta between the two is the IOCP write-path + # overhead (audit rows #1, #4, #13). + # ------------------------------------------------------------------ + WRITE_SRC="$DRILL_ROOT/write/src" + WRITE_DST_OC="$DRILL_ROOT/write/dst_oc" + WRITE_DST_CP="$DRILL_ROOT/write/dst_cp" + mkdir -p "$WRITE_SRC" "$WRITE_DST_OC" "$WRITE_DST_CP" + cp "$LARGE_SRC/large.bin" "$WRITE_SRC/large.bin" + + write_only_out="$BENCH_OUT_DIR/write_only_iocp.json" + log "running scenario: write_only_iocp -> $write_only_out" + hyperfine \ + --warmup "$BENCH_WARMUP" \ + --runs "$BENCH_RUNS" \ + --export-json "$write_only_out" \ + --command-name "oc-rsync-write" \ + --prepare "rm -rf '$WRITE_DST_OC'/* '$WRITE_DST_OC'/.[!.]* 2>/dev/null || true" \ + "'$OC_RSYNC' --whole-file --inplace -a '$WRITE_SRC/' '$WRITE_DST_OC/'" \ + --command-name "fs-copy-control" \ + --prepare "rm -rf '$WRITE_DST_CP'/* '$WRITE_DST_CP'/.[!.]* 2>/dev/null || true" \ + "cp '$WRITE_SRC/large.bin' '$WRITE_DST_CP/large.bin'" + + # ------------------------------------------------------------------ + # read_only_iocp + # --dry-run walks and reads the source but never writes the + # destination, isolating IocpReader's per-IO blocking drain + # (audit rows #2, #3). Control is upstream rsync with the same + # flag, so any delta reflects oc-rsync's read-side completion + # handling rather than the dry-run bookkeeping itself. + # ------------------------------------------------------------------ + READ_DST_OC="$DRILL_ROOT/read/dst_oc" + READ_DST_UP="$DRILL_ROOT/read/dst_up" + mkdir -p "$READ_DST_OC" "$READ_DST_UP" + + read_only_out="$BENCH_OUT_DIR/read_only_iocp.json" + log "running scenario: read_only_iocp -> $read_only_out" + hyperfine \ + --warmup "$BENCH_WARMUP" \ + --runs "$BENCH_RUNS" \ + --export-json "$read_only_out" \ + --command-name "oc-rsync-read" \ + "'$OC_RSYNC' -a --dry-run '$LARGE_SRC/' '$READ_DST_OC/'" \ + --command-name "upstream-rsync-read" \ + "'$UPSTREAM_RSYNC' -a --dry-run '$LARGE_SRC/' '$READ_DST_UP/'" + + # ------------------------------------------------------------------ + # network_only_loopback + # Spawns two short-lived oc-rsync daemons on loopback ports and + # measures a push from one to the other. Source and destination + # are on the same disk, so disk bandwidth is symmetrical; the + # variable under test is the IocpSocket send/recv path (audit + # rows #8-#11). Control is upstream rsync running its own + # loopback daemon under the same shape. + # ------------------------------------------------------------------ + NET_ROOT="$DRILL_ROOT/network" + NET_SRC="$NET_ROOT/src" + NET_DST_OC="$NET_ROOT/dst_oc" + NET_DST_UP="$NET_ROOT/dst_up" + NET_CONF_OC="$NET_ROOT/oc-rsyncd.conf" + NET_CONF_UP="$NET_ROOT/upstream-rsyncd.conf" + NET_PID_OC="$NET_ROOT/oc-rsyncd.pid" + NET_PID_UP="$NET_ROOT/upstream-rsyncd.pid" + NET_LOG_OC="$NET_ROOT/oc-rsyncd.log" + NET_LOG_UP="$NET_ROOT/upstream-rsyncd.log" + mkdir -p "$NET_SRC" "$NET_DST_OC" "$NET_DST_UP" + cp "$LARGE_SRC/large.bin" "$NET_SRC/large.bin" + + OC_PORT="$BENCH_DAEMON_PORT" + UP_PORT=$(( BENCH_DAEMON_PORT + 1 )) + + cat >"$NET_CONF_OC" <"$NET_CONF_UP" <"$NET_LOG_OC" 2>&1 & + OC_DAEMON_PID=$! + log "starting loopback upstream rsync daemon on 127.0.0.1:$UP_PORT" + "$UPSTREAM_RSYNC" --daemon --no-detach --config="$NET_CONF_UP" >"$NET_LOG_UP" 2>&1 & + UP_DAEMON_PID=$! + + stop_daemons() { + if [ -n "${OC_DAEMON_PID:-}" ]; then + kill "$OC_DAEMON_PID" 2>/dev/null || true + wait "$OC_DAEMON_PID" 2>/dev/null || true + fi + if [ -n "${UP_DAEMON_PID:-}" ]; then + kill "$UP_DAEMON_PID" 2>/dev/null || true + wait "$UP_DAEMON_PID" 2>/dev/null || true + fi + } + trap 'stop_daemons; rm -rf "$WORKROOT"' EXIT INT TERM + + # Poll briefly for each daemon's listening port (avoid fixed sleep). + wait_for_port() { + port="$1" + attempts=0 + while [ "$attempts" -lt 50 ]; do + if (echo >/dev/tcp/127.0.0.1/"$port") >/dev/null 2>&1; then + return 0 + fi + attempts=$(( attempts + 1 )) + sleep 0.1 + done + return 1 + } + if ! wait_for_port "$OC_PORT"; then + log "oc-rsync daemon failed to bind 127.0.0.1:$OC_PORT; see $NET_LOG_OC" + stop_daemons + exit 1 + fi + if ! wait_for_port "$UP_PORT"; then + log "upstream rsync daemon failed to bind 127.0.0.1:$UP_PORT; see $NET_LOG_UP" + stop_daemons + exit 1 + fi + + network_only_out="$BENCH_OUT_DIR/network_only_loopback.json" + log "running scenario: network_only_loopback -> $network_only_out" + hyperfine \ + --warmup "$BENCH_WARMUP" \ + --runs "$BENCH_RUNS" \ + --export-json "$network_only_out" \ + --command-name "oc-rsync-loopback" \ + --prepare "rm -rf '$NET_DST_OC'/* '$NET_DST_OC'/.[!.]* 2>/dev/null || true" \ + "'$OC_RSYNC' -a '$NET_SRC/' 'rsync://127.0.0.1:$OC_PORT/bench/'" \ + --command-name "upstream-rsync-loopback" \ + --prepare "rm -rf '$NET_DST_UP'/* '$NET_DST_UP'/.[!.]* 2>/dev/null || true" \ + "'$UPSTREAM_RSYNC' -a '$NET_SRC/' 'rsync://127.0.0.1:$UP_PORT/bench/'" + + stop_daemons + trap 'rm -rf "$WORKROOT"' EXIT INT TERM +fi + log "done. reports in: $BENCH_OUT_DIR" ls -la "$BENCH_OUT_DIR"