diff --git a/README.md b/README.md
index d55079b2..f82e3a40 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@ Codex CLI-first multi-account OAuth manager for the official `@openai/codex` CLI
 - Interactive dashboard for account actions and settings
 - Experimental settings tab for staged sync, backup, and refresh-guard controls
 - Forecast, report, fix, and doctor commands for operational safety
+- Runtime request budget, cooldown, and traffic observability in `codex auth status` / `codex auth report`
 - Flagged account verification and restore flow
 - Session affinity and live account sync controls
 - Proactive refresh and preemptive quota deferral controls
@@ -156,6 +157,14 @@ If browser launch is blocked, use the alternate login paths in [docs/getting-sta
 | `codex auth report --live --json` | How do I get the full machine-readable health report? |
 | `codex auth fix --live --model gpt-5-codex` | How do I run live repair probes with a chosen model? |
 
+### Reliability behavior
+
+- whole-pool replay is disabled by default when every account is rate-limited
+- active requests use a bounded outbound request budget so one prompt cannot walk the full pool indefinitely
+- repeated cross-account 5xx bursts trigger a short cooldown instead of continuing aggressive rotation
+- proactive refresh is staggered to reduce background refresh bursts
+- `codex auth status` surfaces recent runtime request metrics in text output, and `codex auth report --json` exposes the machine-readable cooldown/runtime snapshot
+
 ---
 
 ## Dashboard Hotkeys
@@ -190,6 +199,7 @@ If browser launch is blocked, use the alternate login paths in [docs/getting-sta
 | Accounts | `~/.codex/multi-auth/openai-codex-accounts.json` |
 | Flagged accounts | `~/.codex/multi-auth/openai-codex-flagged-accounts.json` |
 | Quota cache | `~/.codex/multi-auth/quota-cache.json` |
+| Runtime observability | `~/.codex/multi-auth/runtime-observability.json` |
 | Logs | `~/.codex/multi-auth/logs/codex-plugin/` |
 | Per-project accounts | `~/.codex/multi-auth/projects/<project-key>/openai-codex-accounts.json` |
 
@@ -265,6 +275,9 @@ codex auth login
 
 - `codex auth` unrecognized: run `where codex`, then follow `docs/troubleshooting.md` for routing fallback commands
 - Switch succeeds but wrong account appears active: run `codex auth switch <index>`, then restart session
+- Requests fail fast with a pool cooldown message: wait for the cooldown window or inspect `codex auth status`
+- Requests fail fast after repeated upstream 5xx errors: inspect `codex auth report --json` for runtime traffic and cooldown details
+- Storage cleanup fails with `EBUSY` / `EPERM` (Windows): run `codex auth doctor --fix` to retry, or manually remove `~/.codex/multi-auth/<project-key>/` and re-login
 - OAuth callback on port `1455` fails: free the port and re-run `codex auth login`
 - Browser launch is blocked or you are in a headless shell: re-run `codex auth login --manual` or set `CODEX_AUTH_NO_BROWSER=1`
 - `missing field id_token` / `token_expired` / `refresh_token_reused`: re-login affected account
diff --git a/index.ts b/index.ts
index e705bbb6..f52a756e 100644
--- a/index.ts
+++ b/index.ts
@@ -65,6 +65,10 @@ import {
 	getFastSessionMaxInputItems,
 	getFastSessionStrategy,
 	getFetchTimeoutMs,
+	getRateLimitDedupWindowMs,
+	getRateLimitMaxBackoffMs,
+	getRateLimitShortRetryThresholdMs,
+	getRateLimitStateResetMs,
 	getLiveAccountSync,
 	getLiveAccountSyncDebounceMs,
 	getLiveAccountSyncPollMs,
@@ -147,6 +151,19 @@ import {
 	parseEnvInt,
 	parseFailoverMode,
 } from "./lib/request/failover-config.js";
+import {
+	capStreamFailoverMax,
+	computeOutboundRequestAttemptBudget,
+} from "./lib/request/request-attempt-budget.js";
+import {
+	armPoolExhaustionCooldown,
+	buildAdaptiveStreamFailoverCandidateOrder,
+	clearPoolExhaustionCooldown,
+	clearServerBurstCooldown,
+	getPoolExhaustionCooldownRemaining,
+	getServerBurstCooldownRemaining,
+	recordServerBurstFailure,
+} from "./lib/request/request-resilience.js";
 import {
 	evaluateFailurePolicy,
 	type FailoverMode,
@@ -166,8 +183,10 @@ import {
 	transformRequestForCodex,
 } from "./lib/request/fetch-helpers.js";
 import {
+	configureRateLimitBackoff,
 	getRateLimitBackoff,
-	RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
+	MAX_SHORT_RETRY_ATTEMPTS,
+	getRateLimitShortRetryThresholdMs as getConfiguredRateLimitShortRetryThresholdMs,
 	resetRateLimitBackoff,
 } from "./lib/request/rate-limit-backoff.js";
 import {
@@ -228,6 +247,7 @@ import {
 	ensureRefreshGuardianState,
 	ensureSessionAffinityState,
 } from "./lib/runtime/runtime-services.js";
+import { mutateRuntimeObservabilitySnapshot } from "./lib/runtime/runtime-observability.js";
 import { applyAccountStorageScopeFromConfig } from "./lib/runtime/storage-scope.js";
 import { showRuntimeToast } from "./lib/runtime/toast.js";
 import { createRuntimeSessionRecoveryHook } from "./lib/runtime/session-recovery.js";
@@ -309,13 +329,15 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 	let startupPrewarmTriggered = false;
 	let lastCodexCliActiveSyncIndex: number | null = null;
 	let perProjectStorageWarningShown = false;
-	let liveAccountSync: LiveAccountSync | null = null;
-	let liveAccountSyncPath: string | null = null;
+let liveAccountSync: LiveAccountSync | null = null;
+let liveAccountSyncPath: string | null = null;
+let liveAccountSyncConfigKey: string | null = null;
 	let refreshGuardian: RefreshGuardian | null = null;
 	let refreshGuardianConfigKey: string | null = null;
 	let refreshGuardianCleanupRegistered = false;
-	let sessionAffinityStore: SessionAffinityStore | null =
-		new SessionAffinityStore();
+let sessionAffinityStore: SessionAffinityStore | null =
+	new SessionAffinityStore();
+let sessionAffinityWriteVersion = 0;
 	let sessionAffinityConfigKey: string | null = null;
 	const entitlementCache = new EntitlementCache();
 	const preemptiveQuotaScheduler = new PreemptiveQuotaScheduler();
@@ -337,22 +359,32 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 
 	type RuntimeMetrics = {
 		startedAt: number;
-		totalRequests: number;
-		successfulRequests: number;
-		failedRequests: number;
-		rateLimitedResponses: number;
-		serverErrors: number;
-		networkErrors: number;
-		userAborts: number;
-		authRefreshFailures: number;
-		emptyResponseRetries: number;
-		accountRotations: number;
-		sameAccountRetries: number;
-		streamFailoverAttempts: number;
-		streamFailoverRecoveries: number;
-		streamFailoverCrossAccountRecoveries: number;
-		cumulativeLatencyMs: number;
-		lastRequestAt: number | null;
+	totalRequests: number;
+	successfulRequests: number;
+	failedRequests: number;
+	responsesRequests: number;
+	authRefreshRequests: number;
+	diagnosticProbeRequests: number;
+	outboundRequestAttemptBudget: number | null;
+	outboundRequestAttemptsConsumed: number;
+	requestAttemptBudgetExhaustions: number;
+	poolExhaustionFastFails: number;
+	serverBurstFastFails: number;
+	rateLimitedResponses: number;
+	serverErrors: number;
+	networkErrors: number;
+	userAborts: number;
+	authRefreshFailures: number;
+	emptyResponseRetries: number;
+	accountRotations: number;
+	sameAccountRetries: number;
+	streamFailoverAttempts: number;
+	streamFailoverCandidatesConsidered: number;
+	lastStreamFailoverCandidateCount: number;
+	streamFailoverRecoveries: number;
+	streamFailoverCrossAccountRecoveries: number;
+	cumulativeLatencyMs: number;
+	lastRequestAt: number | null;
 		lastError: string | null;
 	};
 
@@ -361,6 +393,14 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 		totalRequests: 0,
 		successfulRequests: 0,
 		failedRequests: 0,
+		responsesRequests: 0,
+		authRefreshRequests: 0,
+		diagnosticProbeRequests: 0,
+		outboundRequestAttemptBudget: null,
+		outboundRequestAttemptsConsumed: 0,
+		requestAttemptBudgetExhaustions: 0,
+		poolExhaustionFastFails: 0,
+		serverBurstFastFails: 0,
 		rateLimitedResponses: 0,
 		serverErrors: 0,
 		networkErrors: 0,
@@ -370,6 +410,8 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 		accountRotations: 0,
 		sameAccountRetries: 0,
 		streamFailoverAttempts: 0,
+		streamFailoverCandidatesConsidered: 0,
+		lastStreamFailoverCandidateCount: 0,
 		streamFailoverRecoveries: 0,
 		streamFailoverCrossAccountRecoveries: 0,
 		cumulativeLatencyMs: 0,
@@ -399,6 +441,26 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 		findMatchingAccountIndex,
 		modelFamilies: MODEL_FAMILIES,
 	});
+	const syncRuntimeObservability = (requestId: string | null): void => {
+		mutateRuntimeObservabilitySnapshot((snapshot) => {
+			const now = Date.now();
+			const poolRemaining = getPoolExhaustionCooldownRemaining(now);
+			const burstRemaining = getServerBurstCooldownRemaining(now);
+			snapshot.currentRequestId = requestId;
+			snapshot.poolExhaustionCooldownUntil =
+				poolRemaining > 0
+					? now + poolRemaining
+					: null;
+			snapshot.serverBurstCooldownUntil =
+				burstRemaining > 0
+					? now + burstRemaining
+					: null;
+			snapshot.responsesRequests = runtimeMetrics.responsesRequests;
+			snapshot.authRefreshRequests = runtimeMetrics.authRefreshRequests;
+			snapshot.diagnosticProbeRequests = runtimeMetrics.diagnosticProbeRequests;
+			snapshot.runtimeMetrics = { ...runtimeMetrics };
+		});
+	};
 	const persistAccountPoolAndFlagged = async (
 		results: TokenSuccessWithAccount[],
 		flaggedStorage: Parameters<typeof saveFlaggedAccounts>[0],
@@ -481,7 +543,10 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 			authFallback,
 			currentSync: liveAccountSync,
 			currentPath: liveAccountSyncPath,
+			currentConfigKey: liveAccountSyncConfigKey,
 			getLiveAccountSync,
+			getLiveAccountSyncDebounceMs,
+			getLiveAccountSyncPollMs,
 			getStoragePath,
 			createSync: (oauthFallback) =>
 				new LiveAccountSync(
@@ -502,6 +567,7 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 		});
 		liveAccountSync = next.liveAccountSync;
 		liveAccountSyncPath = next.liveAccountSyncPath;
+		liveAccountSyncConfigKey = next.liveAccountSyncConfigKey;
 	};
 	const ensureRefreshGuardian = (
 		pluginConfig: ReturnType<typeof loadPluginConfig>,
@@ -686,6 +752,13 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 					const fastSessionMaxInputItems =
 						getFastSessionMaxInputItems(pluginConfig);
 					const tokenRefreshSkewMs = getTokenRefreshSkewMs(pluginConfig);
+					configureRateLimitBackoff({
+						dedupWindowMs: getRateLimitDedupWindowMs(pluginConfig),
+						stateResetMs: getRateLimitStateResetMs(pluginConfig),
+						maxBackoffMs: getRateLimitMaxBackoffMs(pluginConfig),
+						shortRetryThresholdMs:
+							getRateLimitShortRetryThresholdMs(pluginConfig),
+					});
 					const rateLimitToastDebounceMs =
 						getRateLimitToastDebounceMs(pluginConfig);
 					const retryAllAccountsRateLimited =
@@ -713,8 +786,10 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 					);
 					const streamFailoverMax = Math.max(
 						0,
-						parseEnvInt(process.env.CODEX_AUTH_STREAM_FAILOVER_MAX) ??
-							STREAM_FAILOVER_MAX_BY_MODE[failoverMode],
+						capStreamFailoverMax(
+							parseEnvInt(process.env.CODEX_AUTH_STREAM_FAILOVER_MAX) ??
+								STREAM_FAILOVER_MAX_BY_MODE[failoverMode],
+						),
 					);
 					const streamFailoverSoftTimeoutMs = Math.max(
 						1_000,
@@ -862,6 +937,8 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 										.trim() || undefined;
 								const sessionAffinityKey =
 									threadIdCandidate ?? promptCacheKey ?? null;
+								const sessionAffinityVersion =
+									(sessionAffinityWriteVersion += 1);
 								const effectivePromptCacheKey =
 									(sessionAffinityKey ?? promptCacheKey ?? "")
 										.toString()
@@ -891,18 +968,96 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 										sessionAffinityKey,
 									);
 								sessionAffinityStore?.prune();
-								const requestCorrelationId = setCorrelationId(
-									threadIdCandidate
-										? `${threadIdCandidate}:${Date.now()}`
-										: undefined,
+							const requestCorrelationId = setCorrelationId(
+								threadIdCandidate
+									? `${threadIdCandidate}:${Date.now()}`
+									: undefined,
+							);
+							const requestTraceId = requestCorrelationId;
+							runtimeMetrics.lastRequestAt = Date.now();
+							syncRuntimeObservability(requestTraceId);
+
+							const abortSignal = requestInit?.signal ?? init?.signal ?? null;
+							const sleep = createAbortableSleep(abortSignal);
+							const poolCooldownRemainingMs = getPoolExhaustionCooldownRemaining();
+							if (poolCooldownRemainingMs > 0) {
+								runtimeMetrics.failedRequests += 1;
+								runtimeMetrics.poolExhaustionFastFails += 1;
+								runtimeMetrics.lastError = "Pool exhaustion cooldown active";
+								syncRuntimeObservability(requestTraceId);
+								return new Response(
+									JSON.stringify({
+										error: {
+											message: `The account pool is cooling down after recent rate-limit exhaustion. Try again in ${formatWaitTime(poolCooldownRemainingMs)} or inspect \`codex auth status\`.`,
+										},
+									}),
+									{ status: 429, headers: { "content-type": "application/json; charset=utf-8" } },
+								);
+							}
+							const serverBurstCooldownRemainingMs = getServerBurstCooldownRemaining();
+							if (serverBurstCooldownRemainingMs > 0) {
+								runtimeMetrics.failedRequests += 1;
+								runtimeMetrics.serverBurstFastFails += 1;
+								runtimeMetrics.lastError = "Server burst cooldown active";
+								syncRuntimeObservability(requestTraceId);
+								return new Response(
+									JSON.stringify({
+										error: {
+											message: `Multiple accounts recently failed with upstream server errors. Try again in ${formatWaitTime(serverBurstCooldownRemainingMs)} or inspect \`codex auth report --json\`.`,
+										},
+									}),
+									{ status: 503, headers: { "content-type": "application/json; charset=utf-8" } },
 								);
-								runtimeMetrics.lastRequestAt = Date.now();
+							}
+							const maxOutboundRequestAttempts =
+								computeOutboundRequestAttemptBudget({
+									accountCount: accountManager.getAccountCount(),
+									maxSameAccountRetries,
+									emptyResponseMaxRetries,
+									streamFailoverMax,
+								});
+								runtimeMetrics.outboundRequestAttemptBudget ??=
+									maxOutboundRequestAttempts;
+							logDebug("Configured outbound request attempt budget.", {
+								requestTraceId,
+								budget: maxOutboundRequestAttempts,
+								accountCount: accountManager.getAccountCount(),
+								maxSameAccountRetries,
+								emptyResponseMaxRetries,
+								streamFailoverMax,
+							});
+							syncRuntimeObservability(requestTraceId);
+							let outboundRequestAttemptsRemaining =
+								maxOutboundRequestAttempts;
+							const tryConsumeOutboundRequestAttempt = (
+								reason: "primary" | "stream-failover",
+								accountIndex: number,
+							): boolean => {
+								if (outboundRequestAttemptsRemaining <= 0) {
+									runtimeMetrics.requestAttemptBudgetExhaustions += 1;
+									runtimeMetrics.lastError =
+										`Request attempt budget exhausted after ${maxOutboundRequestAttempts} outbound request(s)`;
+											logWarn(
+												"Request attempt budget exhausted.",
+												{
+													requestTraceId,
+													reason,
+													accountIndex,
+													budget: maxOutboundRequestAttempts,
+											consumed: maxOutboundRequestAttempts,
+												},
+											);
+											syncRuntimeObservability(requestTraceId);
+											return false;
+								}
 
-								const abortSignal = requestInit?.signal ?? init?.signal ?? null;
-								const sleep = createAbortableSleep(abortSignal);
+								runtimeMetrics.outboundRequestAttemptsConsumed += 1;
+								outboundRequestAttemptsRemaining -= 1;
+								return true;
+							};
 
-								let allRateLimitedRetries = 0;
-								let emptyResponseRetries = 0;
+							let allRateLimitedRetries = 0;
+							let emptyResponseRetries = 0;
 								const attemptedUnsupportedFallbackModels = new Set<string>();
 								if (model) {
 									attemptedUnsupportedFallbackModels.add(model);
@@ -1015,6 +1170,8 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 										) as OAuthAuthDetails;
 										try {
 											if (shouldRefreshToken(accountAuth, tokenRefreshSkewMs)) {
+												runtimeMetrics.authRefreshRequests += 1;
+												syncRuntimeObservability(requestTraceId);
 												accountAuth = (await refreshAndUpdateToken(
 													accountAuth,
 													client,
@@ -1268,9 +1425,32 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 										}
 
 										let sameAccountRetryCount = 0;
+										let shortRateLimitRetryCount = 0;
 										let successAccountForResponse = account;
 										let successEntitlementAccountKey = entitlementAccountKey;
 										while (true) {
+											if (
+												!tryConsumeOutboundRequestAttempt(
+													"primary",
+													account.index,
+												)
+											) {
+												runtimeMetrics.failedRequests++;
+												const lastErrorDetail = runtimeMetrics.lastError;
+												const message = lastErrorDetail
+													? `${lastErrorDetail}. Try again after the current retries settle.`
+													: "Request attempt budget exhausted. Try again shortly.";
+												return new Response(
+													JSON.stringify({ error: { message } }),
+													{
+														status: 503,
+														headers: {
+															"content-type": "application/json; charset=utf-8",
+														},
+													},
+												);
+											}
+
 											let response: Response;
 											const fetchStart = performance.now();
 
@@ -1305,6 +1485,8 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 
 											try {
 												runtimeMetrics.totalRequests++;
+												runtimeMetrics.responsesRequests++;
+												syncRuntimeObservability(requestTraceId);
 												response = await fetch(
 													url,
 													applyProxyCompatibleInit(url, {
@@ -1801,16 +1983,40 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 													const serverRetryAfterMs = parseRetryAfterHintMs(
 														response.headers,
 													);
-													const policy = evaluateFailurePolicy(
-														{
-															kind: "server",
-															failoverMode,
-															serverRetryAfterMs:
-																serverRetryAfterMs ?? undefined,
-														},
-														{ serverCooldownMs: serverErrorCooldownMs },
-													);
-													if (policy.refundToken) {
+												const policy = evaluateFailurePolicy(
+													{
+														kind: "server",
+														failoverMode,
+														serverRetryAfterMs:
+															serverRetryAfterMs ?? undefined,
+													},
+													{ serverCooldownMs: serverErrorCooldownMs },
+												);
+												const serverBurstCooldownUntil = recordServerBurstFailure(
+													account.index,
+												);
+												if (serverBurstCooldownUntil > 0) {
+													runtimeMetrics.lastError =
+														"Repeated upstream server errors across the account pool";
+													syncRuntimeObservability(requestTraceId);
+												}
+												// Overload-type server errors (502 Bad Gateway, 503 Service
+												// Unavailable, 529 Overloaded) signal upstream capacity
+												// pressure. Notify the quota scheduler so it can proactively
+							// defer subsequent requests for this quota key, mirroring the
+							// 429 handler's scheduler awareness.
+							if (
+								(response.status === 502 ||
+									response.status === 503 ||
+									response.status === 529) &&
+								typeof policy.cooldownMs === "number"
+							) {
+								preemptiveQuotaScheduler.markRateLimited(
+									quotaScheduleKey,
+									policy.cooldownMs,
+								);
+							}
+							if (policy.refundToken) {
 														accountManager.refundToken(
 															account,
 															modelFamily,
@@ -1858,14 +2064,15 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 													break;
 												}
 
-												if (response.status === 429) {
+										if (errorResponse.status === 429 && rateLimit) {
 													runtimeMetrics.rateLimitedResponses++;
 													const retryAfterMs =
 														rateLimit?.retryAfterMs ?? 60_000;
-													const { attempt, delayMs } = getRateLimitBackoff(
+													const { delayMs } = getRateLimitBackoff(
 														account.index,
 														quotaKey,
 														retryAfterMs,
+														account.accountId ?? account.email ?? null,
 													);
 													const cooldownMs = Math.max(
 														delayMs,
@@ -1876,16 +2083,24 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 														cooldownMs,
 													);
 													const waitLabel = formatWaitTime(cooldownMs);
+													const shortRetryThresholdMs =
+														getConfiguredRateLimitShortRetryThresholdMs();
 
-													if (cooldownMs <= RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS) {
+													if (
+														cooldownMs <=
+														shortRetryThresholdMs &&
+														shortRateLimitRetryCount < MAX_SHORT_RETRY_ATTEMPTS
+													) {
+														shortRateLimitRetryCount += 1;
 														if (
 															accountManager.shouldShowAccountToast(
 																account.index,
 																rateLimitToastDebounceMs,
 															)
 														) {
-															await showRuntimeToast(client, 
-																`Rate limited. Retrying in ${waitLabel} (attempt ${attempt})...`,
+															await showRuntimeToast(
+																client,
+																`Rate limited. Retrying in ${waitLabel} (attempt ${shortRateLimitRetryCount})...`,
 																"warning",
 																{ duration: toastDurationMs },
 															);
@@ -1955,13 +2170,31 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 											runtimeMetrics.cumulativeLatencyMs += fetchLatencyMs;
 											let responseForSuccess = response;
 											if (isStreaming) {
-												const streamFallbackCandidateOrder = [
-													account.index,
-													...accountManager
-														.getAccountsSnapshot()
-														.map((candidate) => candidate.index)
-														.filter((index) => index !== account.index),
-												];
+												const streamFallbackCandidateOrder =
+													buildAdaptiveStreamFailoverCandidateOrder(
+														account.index,
+														accountSnapshotList as Array<
+															Pick<
+																import("./lib/accounts.js").ManagedAccount,
+																"index" | "lastUsed" | "enabled" | "coolingDownUntil" | "rateLimitResetTimes"
+															>
+														>,
+													);
+												runtimeMetrics.lastStreamFailoverCandidateCount =
+													streamFallbackCandidateOrder.length;
+												runtimeMetrics.streamFailoverCandidatesConsidered +=
+													streamFallbackCandidateOrder.length;
+												logDebug(
+													"Prepared stream failover candidates.",
+													{
+														requestTraceId,
+														primaryAccountIndex: account.index,
+														candidateCount:
+															streamFallbackCandidateOrder.length,
+														candidateIndices: streamFallbackCandidateOrder,
+													},
+												);
+												syncRuntimeObservability(requestTraceId);
 												responseForSuccess = withStreamingFailover(
 													response,
 													async (failoverAttempt, emittedBytes) => {
@@ -1994,13 +2227,15 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 																fallbackAccount,
 															) as OAuthAuthDetails;
 															try {
-																if (
-																	shouldRefreshToken(
-																		fallbackAuth,
-																		tokenRefreshSkewMs,
-																	)
-																) {
-																	fallbackAuth = (await refreshAndUpdateToken(
+															if (
+																shouldRefreshToken(
+																	fallbackAuth,
+																	tokenRefreshSkewMs,
+																)
+															) {
+																runtimeMetrics.authRefreshRequests += 1;
+																syncRuntimeObservability(requestTraceId);
+																fallbackAuth = (await refreshAndUpdateToken(
 																		fallbackAuth,
 																		client,
 																	)) as OAuthAuthDetails;
@@ -2069,6 +2304,14 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 																continue;
 															}
 
+															if (
+																!tryConsumeOutboundRequestAttempt(
+																	"stream-failover",
+																	fallbackAccount.index,
+																)
+															) {
+																return null;
+															}
 															if (
 																!accountManager.consumeToken(
 																	fallbackAccount,
@@ -2128,6 +2371,8 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 
 															try {
 																runtimeMetrics.totalRequests++;
+																runtimeMetrics.responsesRequests++;
+																syncRuntimeObservability(requestTraceId);
 																const fallbackResponse = await fetch(
 																	url,
 																	applyProxyCompatibleInit(url, {
@@ -2207,12 +2452,14 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 																if (fallbackAccount.index !== account.index) {
 																	runtimeMetrics.streamFailoverCrossAccountRecoveries += 1;
 																	runtimeMetrics.accountRotations += 1;
-																	if (!responseContinuationEnabled) {
-																		sessionAffinityStore?.remember(
-																			sessionAffinityKey,
-																			fallbackAccount.index,
-																		);
-																	}
+													if (!responseContinuationEnabled) {
+														sessionAffinityStore?.rememberWithVersion(
+															sessionAffinityKey,
+															fallbackAccount.index,
+															Date.now(),
+															sessionAffinityVersion,
+														);
+													}
 																}
 
 																logInfo(
@@ -2273,15 +2520,19 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 												isStreaming,
 												{
 													onResponseId: (responseId) => {
-														if (!responseContinuationEnabled) return;
-														sessionAffinityStore?.remember(
-															sessionAffinityKey,
-															successAccountForResponse.index,
-														);
-														sessionAffinityStore?.updateLastResponseId(
-															sessionAffinityKey,
-															responseId,
-														);
+												if (!responseContinuationEnabled) return;
+												sessionAffinityStore?.rememberWithVersion(
+													sessionAffinityKey,
+													successAccountForResponse.index,
+													Date.now(),
+													sessionAffinityVersion,
+												);
+												sessionAffinityStore?.updateLastResponseId(
+													sessionAffinityKey,
+													responseId,
+													Date.now(),
+													sessionAffinityVersion,
+												);
 														storedResponseIdForSuccess = true;
 													},
 													streamStallTimeoutMs,
@@ -2353,6 +2604,24 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 															);
 															break;
 														}
+														if (response.status >= 500) {
+															const serverBurstCooldownUntil =
+																recordServerBurstFailure(account.index);
+															if (serverBurstCooldownUntil > 0) {
+																runtimeMetrics.serverBurstFastFails += 1;
+																runtimeMetrics.lastError =
+																	"Repeated upstream server errors across the account pool";
+																syncRuntimeObservability(requestTraceId);
+																return new Response(
+																	JSON.stringify({
+																		error: {
+																			message: `Upstream server failures were observed across multiple accounts. Pausing retries for ${formatWaitTime(serverBurstCooldownUntil - Date.now())}. Check \`codex auth report --json\` for runtime metrics.`,
+																		},
+																	}),
+																	{ status: 503, headers: { "content-type": "application/json; charset=utf-8" } },
+																);
+															}
+														}
 														logWarn(
 															`Empty response after ${emptyResponseMaxRetries} retries. Returning as-is.`,
 														);
@@ -2387,9 +2656,11 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 												!responseContinuationEnabled ||
 												(!isStreaming && !storedResponseIdForSuccess)
 											) {
-												sessionAffinityStore?.remember(
+												sessionAffinityStore?.rememberWithVersion(
 													sessionAffinityKey,
 													successAccountForResponse.index,
+													Date.now(),
+													sessionAffinityVersion,
 												);
 											}
 											runtimeMetrics.successfulRequests++;
@@ -2404,6 +2675,9 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 												lastCodexCliActiveSyncIndex =
 													successAccountForResponse.index;
 											}
+											clearPoolExhaustionCooldown();
+											clearServerBurstCooldown();
+											syncRuntimeObservability(requestTraceId);
 											return successResponse;
 										}
 										if (retryNextAccountBeforeFallback) {
@@ -2449,26 +2723,39 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 										continue;
 									}
 
-									const waitLabel =
-										waitMs > 0 ? formatWaitTime(waitMs) : "a bit";
-									const message =
-										count === 0
-											? "No Codex accounts configured. Run `codex login`."
-											: waitMs > 0
-												? `All ${count} account(s) are rate-limited. Try again in ${waitLabel} or add another account with \`codex login\`.`
-												: `All ${count} account(s) failed (server errors or auth issues). Check account health with \`codex-health\`.`;
-									runtimeMetrics.failedRequests++;
-									runtimeMetrics.lastError = message;
-									return new Response(JSON.stringify({ error: { message } }), {
-										status: waitMs > 0 ? 429 : 503,
+											const now = Date.now();
+											const poolCooldownUntil =
+												count > 0 && waitMs > 0
+													? armPoolExhaustionCooldown(waitMs, now)
+													: 0;
+											const effectiveWaitMs =
+												poolCooldownUntil > 0
+													? Math.max(0, poolCooldownUntil - now)
+													: waitMs;
+											const waitLabel =
+												effectiveWaitMs > 0
+													? formatWaitTime(effectiveWaitMs)
+													: "a bit";
+											const message =
+												count === 0
+													? "No Codex accounts configured. Run `codex login`."
+													: effectiveWaitMs > 0
+														? `All ${count} account(s) are rate-limited. A short pool cooldown is now active for ${waitLabel}. Try again later or inspect \`codex auth status\`.`
+														: `All ${count} account(s) failed (server errors or auth issues). Check account health with \`codex auth report --json\`.`;
+											runtimeMetrics.failedRequests++;
+											runtimeMetrics.lastError = message;
+											syncRuntimeObservability(requestTraceId);
+											return new Response(JSON.stringify({ error: { message } }), {
+										status: effectiveWaitMs > 0 ? 429 : 503,
 										headers: {
 											"content-type": "application/json; charset=utf-8",
 										},
 									});
 								}
-							} finally {
-								clearCorrelationId();
-							}
+								} finally {
+									syncRuntimeObservability(null);
+									clearCorrelationId();
+								}
 						},
 					};
 				} finally {
diff --git a/lib/accounts.ts b/lib/accounts.ts
index 9b0e2dd2..9d41d401 100644
--- a/lib/accounts.ts
+++ b/lib/accounts.ts
@@ -535,6 +535,7 @@ export class AccountManager {
 		const account = this.getAccountByIndex(index);
 		if (!account) return false;
 		if (account.enabled === false) return false;
+		if (!this.hasEnabledWorkspaces(account)) return false;
 		clearExpiredRateLimits(account);
 		return (
 			!isRateLimitedForFamily(account, family, model) &&
@@ -606,6 +607,7 @@ export class AccountManager {
 			const account = this.accounts[idx];
 			if (!account) continue;
 			if (account.enabled === false) continue;
+			if (!this.hasEnabledWorkspaces(account)) continue;
 
 			clearExpiredRateLimits(account);
 			if (
@@ -636,6 +638,7 @@ export class AccountManager {
 			const account = this.accounts[idx];
 			if (!account) continue;
 			if (account.enabled === false) continue;
+			if (!this.hasEnabledWorkspaces(account)) continue;
 
 			clearExpiredRateLimits(account);
 			if (
@@ -666,6 +669,7 @@ export class AccountManager {
 			.map((account): AccountWithMetrics | null => {
 				if (!account) return null;
 				if (account.enabled === false) return null;
+				if (!this.hasEnabledWorkspaces(account)) return null;
 				clearExpiredRateLimits(account);
 				const isAvailable =
 					!isRateLimitedForFamily(account, family, model) &&
diff --git a/lib/auto-update-checker.ts b/lib/auto-update-checker.ts
index a948c60e..2d5acc4e 100644
--- a/lib/auto-update-checker.ts
+++ b/lib/auto-update-checker.ts
@@ -1,4 +1,4 @@
-import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
+import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync, unlinkSync } from "node:fs";
 import { join } from "node:path";
 import { createLogger } from "./logger.js";
 import { getCodexCacheDir } from "./runtime-paths.js";
@@ -28,6 +28,53 @@ interface ParsedSemver {
 }
 
 const RETRYABLE_WRITE_ERRORS = new Set(["EBUSY", "EPERM"]);
+let updateCacheWriteQueue: Promise<void> = Promise.resolve();
+
+function enqueueUpdateCacheWrite(writeTask: () => void): Promise<void> {
+	const queued = updateCacheWriteQueue.catch(() => undefined).then(writeTask);
+	updateCacheWriteQueue = queued.then(
+		() => undefined,
+		() => undefined,
+	);
+	return queued;
+}
+
+function writeCacheContents(serialized: string): void {
+	let tempPath: string | null = null;
+	let wroteTemp = false;
+	try {
+		if (!existsSync(CACHE_DIR)) {
+			mkdirSync(CACHE_DIR, { recursive: true });
+		}
+		tempPath = `${CACHE_FILE}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp`;
+		let lastError: Error | null = null;
+		for (let attempt = 0; attempt < 4; attempt++) {
+			try {
+				writeFileSync(tempPath, serialized, "utf8");
+				renameSync(tempPath, CACHE_FILE);
+				wroteTemp = false;
+				return;
+			} catch (error) {
+				const code = (error as NodeJS.ErrnoException).code ?? "";
+				lastError = error as Error;
+				wroteTemp = true;
+				if (!RETRYABLE_WRITE_ERRORS.has(code) || attempt >= 3) {
+					throw error;
+				}
+				sleepSync(15 * (2 ** attempt));
+			}
+		}
+		if (lastError) throw lastError;
+	} finally {
+		if (wroteTemp && tempPath) {
+			try {
+				unlinkSync(tempPath);
+			} catch {
+				// Best effort temp cleanup.
+			}
+		}
+	}
+}
 
 function sleepSync(ms: number): void {
   const delay = Math.max(0, Math.floor(ms));
@@ -41,7 +88,10 @@ function getCurrentVersion(): string {
     const packageJsonPath = join(import.meta.dirname ?? __dirname, "..", "package.json");
     const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf8")) as { version: string };
     return packageJson.version;
-  } catch {
+  } catch (error) {
+    log.debug("Failed to read current package version", {
+      error: error instanceof Error ? error.message : String(error),
+    });
     return "0.0.0";
   }
 }
@@ -51,35 +101,22 @@ function loadCache(): UpdateCheckCache | null {
     if (!existsSync(CACHE_FILE)) return null;
     const content = readFileSync(CACHE_FILE, "utf8");
     return JSON.parse(content) as UpdateCheckCache;
-  } catch {
+  } catch (error) {
+    log.debug("Failed to load update cache", {
+      error: error instanceof Error ? error.message : String(error),
+    });
     return null;
   }
 }
 
-function saveCache(cache: UpdateCheckCache): void {
-  try {
-    if (!existsSync(CACHE_DIR)) {
-      mkdirSync(CACHE_DIR, { recursive: true });
-    }
-    const serialized = JSON.stringify(cache, null, 2);
-    let lastError: Error | null = null;
-    for (let attempt = 0; attempt < 4; attempt++) {
-      try {
-        writeFileSync(CACHE_FILE, serialized, "utf8");
-        return;
-      } catch (error) {
-        const code = (error as NodeJS.ErrnoException).code ?? "";
-        lastError = error as Error;
-        if (!RETRYABLE_WRITE_ERRORS.has(code) || attempt >= 3) {
-          throw error;
-        }
-        sleepSync(15 * (2 ** attempt));
-      }
-    }
-    if (lastError) throw lastError;
-  } catch (error) {
-    log.warn("Failed to save update cache", { error: (error as Error).message });
-  }
+async function saveCache(cache: UpdateCheckCache): Promise<void> {
+	await enqueueUpdateCacheWrite(() => {
+		try {
+			writeCacheContents(JSON.stringify(cache, null, 2));
+		} catch (error) {
+			log.warn("Failed to save update cache", { error: (error as Error).message });
+		}
+	});
 }
 
 function parseSemver(version: string): ParsedSemver {
@@ -211,11 +248,11 @@ export async function checkForUpdates(force = false): Promise<UpdateCheckResult>
 
   const latestVersion = await fetchLatestVersion();
 
-  saveCache({
-    lastCheck: now,
-    latestVersion,
-    currentVersion,
-  });
+	await saveCache({
+		lastCheck: now,
+		latestVersion,
+		currentVersion,
+	});
 
   const hasUpdate = latestVersion ? compareVersions(currentVersion, latestVersion) > 0 : false;
 
@@ -250,11 +287,13 @@ export async function checkAndNotify(
 }
 
 export function clearUpdateCache(): void {
-  try {
-    if (existsSync(CACHE_FILE)) {
-      writeFileSync(CACHE_FILE, "{}", "utf8");
-    }
-  } catch {
-    // Ignore errors
-  }
+	void enqueueUpdateCacheWrite(() => {
+		try {
+			if (existsSync(CACHE_FILE)) {
+				writeCacheContents("{}");
+			}
+		} catch {
+			// Ignore errors
+		}
+	});
 }
diff --git a/lib/codex-manager.ts b/lib/codex-manager.ts
index 3e917640..2b164c90 100644
--- a/lib/codex-manager.ts
+++ b/lib/codex-manager.ts
@@ -50,6 +50,7 @@ import {
 	runFeaturesCommand,
 	runStatusCommand,
 } from "./codex-manager/commands/status.js";
+import { loadPersistedRuntimeObservabilitySnapshot } from "./runtime/runtime-observability.js";
 import { runSwitchCommand } from "./codex-manager/commands/switch.js";
 import { parseAuthLoginArgs, printUsage } from "./codex-manager/help.js";
 import {
@@ -80,6 +81,7 @@ import {
 } from "./request/helpers/model-map.js";
 import {
 	formatRateLimitEntry as formatAccountRateLimitEntry,
+	getRateLimitResetTimeForFamily,
 	resolveActiveIndex,
 } from "./runtime/account-status.js";
 import {
@@ -100,6 +102,7 @@ import {
 	clearAccounts,
 	findMatchingAccountIndex,
 	formatStorageErrorHint,
+	inspectStorageHealth,
 	getLastAccountsSaveTimestamp,
 	getNamedBackups,
 	getStoragePath,
@@ -644,6 +647,41 @@ function getQuotaCacheEntryForAccount(
 	return null;
 }
 
+function getPersistedQuotaViewForAccount(
+	cache: QuotaCacheData | null,
+	account: Pick<AccountMetadataV3, "accountId" | "email" | "rateLimitResetTimes">,
+	accounts: readonly Pick<AccountMetadataV3, "accountId" | "email">[],
+	now: number,
+	emailFallbackState = buildQuotaEmailFallbackState(accounts),
+): QuotaCacheEntry | null {
+	const cachedEntry = cache
+		? getQuotaCacheEntryForAccount(cache, account, accounts, emailFallbackState)
+		: null;
+	const persistedResetAt = getRateLimitResetTimeForFamily(account, now, "codex");
+	if (typeof persistedResetAt !== "number") {
+		return cachedEntry;
+	}
+	const cachedPrimaryResetAt = cachedEntry?.primary.resetAtMs ?? 0;
+	const cachedSecondaryResetAt = cachedEntry?.secondary.resetAtMs ?? 0;
+	if (
+		cachedEntry?.status === 429 &&
+		Math.max(cachedPrimaryResetAt, cachedSecondaryResetAt) >= persistedResetAt
+	) {
+		return cachedEntry;
+	}
+	return {
+		updatedAt: cachedEntry?.updatedAt ?? now,
+		status: 429,
+		model: cachedEntry?.model ?? "gpt-5-codex",
+		planType: cachedEntry?.planType,
+		primary: {
+			...cachedEntry?.primary,
+			resetAtMs: Math.max(cachedPrimaryResetAt, persistedResetAt),
+		},
+		secondary: cachedEntry?.secondary ?? {},
+	};
+}
+
 function updateQuotaCacheForAccount(
 	cache: QuotaCacheData,
 	account: Pick<AccountMetadataV3, "accountId" | "email">,
@@ -742,10 +780,11 @@ function resolveMenuQuotaProbeInput(
 	if (account.enabled === false) return null;
 	if (!hasUsableAccessToken(account, now)) return null;
 
-	const existing = getQuotaCacheEntryForAccount(
+	const existing = getPersistedQuotaViewForAccount(
 		cache,
 		account,
 		accounts,
+		now,
 		emailFallbackState,
 	);
 	if (
@@ -908,6 +947,7 @@ function mapAccountStatus(
 	index: number,
 	activeIndex: number,
 	now: number,
+	persistedQuotaStatus?: number,
 ): ExistingAccountInfo["status"] {
 	if (account.enabled === false) return "disabled";
 	if (
@@ -916,6 +956,7 @@ function mapAccountStatus(
 	) {
 		return "cooldown";
 	}
+	if (persistedQuotaStatus === 429) return "rate-limited";
 	const rateLimit = formatRateLimitEntry(account, now, "codex");
 	if (rateLimit) return "rate-limited";
 	if (index === activeIndex) return "active";
@@ -1061,14 +1102,13 @@ function toExistingAccountInfo(
 	const layoutMode = resolveMenuLayoutMode(displaySettings);
 	const emailFallbackState = buildQuotaEmailFallbackState(storage.accounts);
 	const baseAccounts = storage.accounts.map((account, index) => {
-		const entry = quotaCache
-			? getQuotaCacheEntryForAccount(
-					quotaCache,
-					account,
-					storage.accounts,
-					emailFallbackState,
-				)
-			: null;
+		const entry = getPersistedQuotaViewForAccount(
+			quotaCache,
+			account,
+			storage.accounts,
+			now,
+			emailFallbackState,
+		);
 		return {
 			index,
 			sourceIndex: index,
@@ -1077,7 +1117,7 @@ function toExistingAccountInfo(
 			email: account.email,
 			addedAt: account.addedAt,
 			lastUsed: account.lastUsed,
-			status: mapAccountStatus(account, index, activeIndex, now),
+			status: mapAccountStatus(account, index, activeIndex, now, entry?.status),
 			quotaSummary:
 				(displaySettings.menuShowQuotaSummary ?? true) && entry
 					? formatAccountQuotaSummary(entry)
@@ -3203,8 +3243,10 @@ export async function runCodexMultiAuthCli(rawArgs: string[]): Promise<number> {
 			setStoragePath,
 			getStoragePath,
 			loadAccounts,
+			inspectStorageHealth,
 			resolveActiveIndex,
 			formatRateLimitEntry,
+			loadRuntimeObservabilitySnapshot: loadPersistedRuntimeObservabilitySnapshot,
 		});
 	}
 	if (command === "switch") {
@@ -3234,6 +3276,7 @@ export async function runCodexMultiAuthCli(rawArgs: string[]): Promise<number> {
 			setStoragePath,
 			getStoragePath,
 			loadAccounts,
+			inspectStorageHealth,
 			saveAccounts,
 			resolveActiveIndex,
 			hasUsableAccessToken,
@@ -3241,6 +3284,7 @@ export async function runCodexMultiAuthCli(rawArgs: string[]): Promise<number> {
 			fetchCodexQuotaSnapshot,
 			formatRateLimitEntry,
 			normalizeFailureDetail,
+			loadRuntimeObservabilitySnapshot: loadPersistedRuntimeObservabilitySnapshot,
 		});
 	}
 	if (command === "fix") {
diff --git a/lib/codex-manager/commands/forecast.ts b/lib/codex-manager/commands/forecast.ts
index f72e5707..3ed9f5d1 100644
--- a/lib/codex-manager/commands/forecast.ts
+++ b/lib/codex-manager/commands/forecast.ts
@@ -4,16 +4,18 @@ import {
 	buildForecastExplanation,
 	type ForecastAccountResult,
 } from "../../forecast.js";
+import {
+	applyRefreshedAccountPatch,
+	persistRefreshedAccountPatch,
+	serializeForecastResults,
+	type AccountIdentityMatch,
+	type RefreshedAccountPatch,
+} from "../forecast-report-shared.js";
 import type { QuotaCacheData } from "../../quota-cache.js";
 import type { CodexQuotaSnapshot } from "../../quota-probe.js";
 import { resolveNormalizedModel } from "../../request/helpers/model-map.js";
-import {
-	findMatchingAccountIndex,
-	type AccountMetadataV3,
-	type AccountStorageV3,
-} from "../../storage.js";
+import { type AccountMetadataV3, type AccountStorageV3 } from "../../storage.js";
 import type { TokenFailure, TokenResult } from "../../types.js";
-import { sleep } from "../../utils.js";
 
 interface ForecastCliOptions {
 	live: boolean;
@@ -32,8 +34,6 @@ type QuotaEmailFallbackState = ReadonlyMap<
 	{ matchingCount: number; distinctAccountIds: Set<string> }
 >;
 
-const RETRYABLE_STORAGE_WRITE_CODES = new Set(["EBUSY", "EPERM"]);
-
 export interface ForecastCommandDeps {
 	setStoragePath: (path: string | null) => void;
 	loadAccounts: () => Promise<AccountStorageV3 | null>;
@@ -112,82 +112,6 @@ export interface ForecastCommandDeps {
 	getNow?: () => number;
 }
 
-function isRetryableStorageWriteError(error: unknown): boolean {
-	const code = (error as NodeJS.ErrnoException | undefined)?.code;
-	return typeof code === "string" && RETRYABLE_STORAGE_WRITE_CODES.has(code);
-}
-
-async function saveAccountsWithRetry(
-	storage: AccountStorageV3,
-	saveAccounts: ForecastCommandDeps["saveAccounts"],
-): Promise<void> {
-	for (let attempt = 0; ; attempt += 1) {
-		try {
-			await saveAccounts(storage);
-			return;
-		} catch (error) {
-			if (!isRetryableStorageWriteError(error) || attempt >= 3) {
-				throw error;
-			}
-			await sleep(10 * 2 ** attempt);
-		}
-	}
-}
-
-type AccountIdentityMatch = Pick<
-	AccountMetadataV3,
-	"accountId" | "email" | "refreshToken"
->;
-type RefreshedAccountPatch = Pick<
-	AccountMetadataV3,
-	"refreshToken" | "accessToken" | "expiresAt"
-> & {
-	email?: AccountMetadataV3["email"];
-	accountId?: AccountMetadataV3["accountId"];
-	accountIdSource?: AccountMetadataV3["accountIdSource"];
-};
-
-function applyRefreshedAccountPatch(
-	account: AccountMetadataV3,
-	patch: RefreshedAccountPatch,
-): void {
-	account.refreshToken = patch.refreshToken;
-	account.accessToken = patch.accessToken;
-	account.expiresAt = patch.expiresAt;
-	if (patch.email) account.email = patch.email;
-	if (patch.accountId) {
-		account.accountId = patch.accountId;
-		account.accountIdSource = patch.accountIdSource;
-	}
-}
-
-async function persistRefreshedAccountPatch(
-	storage: AccountStorageV3,
-	accountMatch: AccountIdentityMatch,
-	patch: RefreshedAccountPatch,
-	loadAccounts: ForecastCommandDeps["loadAccounts"],
-	saveAccounts: ForecastCommandDeps["saveAccounts"],
-): Promise<void> {
-	const latestStorage = (await loadAccounts()) ?? storage;
-	const nextStorage = structuredClone(latestStorage);
-	const targetIndex =
-		findMatchingAccountIndex(nextStorage.accounts, accountMatch, {
-			allowUniqueAccountIdFallbackWithoutEmail: true,
-		}) ??
-		findMatchingAccountIndex(nextStorage.accounts, patch, {
-			allowUniqueAccountIdFallbackWithoutEmail: true,
-		});
-	if (targetIndex === undefined) {
-		throw new Error("Unable to resolve refreshed account for persistence");
-	}
-	const targetAccount = nextStorage.accounts[targetIndex];
-	if (!targetAccount) {
-		throw new Error("Unable to resolve refreshed account for persistence");
-	}
-	applyRefreshedAccountPatch(targetAccount, patch);
-	await saveAccountsWithRetry(nextStorage, saveAccounts);
-}
-
 function joinStyledSegments(
 	parts: string[],
 	styleText: (text: string, tone: PromptTone) => string,
@@ -255,54 +179,6 @@ function parseForecastArgs(
 	return { ok: true, options };
 }
 
-function serializeForecastResults(
-	results: ForecastAccountResult[],
-	liveQuotaByIndex: Map<number, CodexQuotaSnapshot>,
-	refreshFailures: Map<number, TokenFailure>,
-	formatQuotaSnapshotLine: (snapshot: CodexQuotaSnapshot) => string,
-): Array<{
-	index: number;
-	label: string;
-	isCurrent: boolean;
-	availability: ForecastAccountResult["availability"];
-	riskScore: number;
-	riskLevel: ForecastAccountResult["riskLevel"];
-	waitMs: number;
-	reasons: string[];
-	liveQuota?: {
-		status: number;
-		planType?: string;
-		activeLimit?: number;
-		model: string;
-		summary: string;
-	};
-	refreshFailure?: TokenFailure;
-}> {
-	return results.map((result) => {
-		const liveQuota = liveQuotaByIndex.get(result.index);
-		return {
-			index: result.index,
-			label: result.label,
-			isCurrent: result.isCurrent,
-			availability: result.availability,
-			riskScore: result.riskScore,
-			riskLevel: result.riskLevel,
-			waitMs: result.waitMs,
-			reasons: result.reasons,
-			liveQuota: liveQuota
-				? {
-						status: liveQuota.status,
-						planType: liveQuota.planType,
-						activeLimit: liveQuota.activeLimit,
-						model: liveQuota.model,
-						summary: formatQuotaSnapshotLine(liveQuota),
-					}
-				: undefined,
-			refreshFailure: refreshFailures.get(result.index),
-		};
-	});
-}
-
 export async function runForecastCommand(
 	args: string[],
 	deps: ForecastCommandDeps & {
diff --git a/lib/codex-manager/commands/report.ts b/lib/codex-manager/commands/report.ts
index a77a2c84..7b557384 100644
--- a/lib/codex-manager/commands/report.ts
+++ b/lib/codex-manager/commands/report.ts
@@ -8,10 +8,16 @@ import {
 } from "../../accounts.js";
 import {
 	evaluateForecastAccounts,
-	type ForecastAccountResult,
 	recommendForecastAccount,
 	summarizeForecast,
 } from "../../forecast.js";
+import {
+	type AccountIdentityMatch,
+	applyRefreshedAccountPatch,
+	persistRefreshedAccountPatch,
+	type RefreshedAccountPatch,
+	serializeForecastResults,
+} from "../forecast-report-shared.js";
 import {
 	type CodexQuotaSnapshot,
 	formatQuotaSnapshotLine,
@@ -23,10 +29,11 @@ import {
 	resolveNormalizedModel,
 } from "../../request/helpers/model-map.js";
 import {
-	findMatchingAccountIndex,
 	type AccountMetadataV3,
 	type AccountStorageV3,
+	type StorageHealthSummary,
 } from "../../storage.js";
+import type { RuntimeObservabilitySnapshot } from "../../runtime/runtime-observability.js";
 import type { TokenFailure, TokenResult } from "../../types.js";
 import { sleep } from "../../utils.js";
 
@@ -35,6 +42,9 @@ interface ReportCliOptions {
 	json: boolean;
 	explain: boolean;
 	model: string;
+	maxAccounts?: number;
+	maxProbes?: number;
+	cachedOnly: boolean;
 	outPath?: string;
 }
 
@@ -50,6 +60,20 @@ interface ModelInspection {
 	capabilities: ReturnType<typeof getModelCapabilities>;
 }
 
+function parsePositiveIntegerOption(
+	rawValue: string,
+): number | null {
+	const normalized = rawValue.trim();
+	if (!/^\d+$/.test(normalized)) {
+		return null;
+	}
+	const parsed = Number.parseInt(normalized, 10);
+	if (!Number.isSafeInteger(parsed) || parsed < 1) {
+		return null;
+	}
+	return parsed;
+}
+
 const RETRYABLE_WRITE_CODES = new Set(["EBUSY", "EPERM"]);
 
 export interface ReportCommandDeps {
@@ -73,6 +97,7 @@ export interface ReportCommandDeps {
 		now: number,
 		family: "codex",
 	) => string | null;
+	inspectStorageHealth?: () => Promise<StorageHealthSummary>;
 	normalizeFailureDetail: (
 		message: string | undefined,
 		reason: string | undefined,
@@ -82,6 +107,7 @@ export interface ReportCommandDeps {
 	getNow?: () => number;
 	getCwd?: () => string;
 	writeFile?: (path: string, contents: string) => Promise<void>;
+	loadRuntimeObservabilitySnapshot?: () => Promise<RuntimeObservabilitySnapshot | null>;
 }
 
 function isRetryableWriteError(error: unknown): boolean {
@@ -99,6 +125,9 @@ function printReportUsage(logInfo: (message: string) => void): void {
 			"  --json, -j         Print machine-readable JSON output",
 			"  --explain          Print per-account reasoning in text mode",
 			"  --model, -m        Probe model for live mode (default: gpt-5-codex)",
+			"  --max-accounts N   Limit how many enabled accounts live mode can consider",
+			"  --max-probes N     Limit how many live quota probes can run",
+			"  --cached-only      Skip refreshes and only use already-usable access tokens",
 			"  --out              Write JSON report to a file path",
 		].join("\n"),
 	);
@@ -110,6 +139,7 @@ function parseReportArgs(args: string[]): ParsedArgsResult<ReportCliOptions> {
 		json: false,
 		explain: false,
 		model: "gpt-5-codex",
+		cachedOnly: false,
 	};
 
 	for (let i = 0; i < args.length; i += 1) {
@@ -127,6 +157,10 @@ function parseReportArgs(args: string[]): ParsedArgsResult<ReportCliOptions> {
 			options.explain = true;
 			continue;
 		}
+		if (arg === "--cached-only") {
+			options.cachedOnly = true;
+			continue;
+		}
 		if (arg === "--model" || arg === "-m") {
 			const value = args[i + 1];
 			if (!value) return { ok: false, message: "Missing value for --model" };
@@ -140,6 +174,44 @@ function parseReportArgs(args: string[]): ParsedArgsResult<ReportCliOptions> {
 			options.model = value;
 			continue;
 		}
+		if (arg === "--max-accounts") {
+			const value = args[i + 1];
+			if (!value) return { ok: false, message: "Missing value for --max-accounts" };
+			const parsed = parsePositiveIntegerOption(value);
+			if (parsed === null) {
+				return { ok: false, message: "--max-accounts must be a positive integer" };
+			}
+			options.maxAccounts = parsed;
+			i += 1;
+			continue;
+		}
+		if (arg.startsWith("--max-accounts=")) {
+			const parsed = parsePositiveIntegerOption(arg.slice("--max-accounts=".length));
+			if (parsed === null) {
+				return { ok: false, message: "--max-accounts must be a positive integer" };
+			}
+			options.maxAccounts = parsed;
+			continue;
+		}
+		if (arg === "--max-probes") {
+			const value = args[i + 1];
+			if (!value) return { ok: false, message: "Missing value for --max-probes" };
+			const parsed = parsePositiveIntegerOption(value);
+			if (parsed === null) {
+				return { ok: false, message: "--max-probes must be a positive integer" };
+			}
+			options.maxProbes = parsed;
+			i += 1;
+			continue;
+		}
+		if (arg.startsWith("--max-probes=")) {
+			const parsed = parsePositiveIntegerOption(arg.slice("--max-probes=".length));
+			if (parsed === null) {
+				return { ok: false, message: "--max-probes must be a positive integer" };
+			}
+			options.maxProbes = parsed;
+			continue;
+		}
 		if (arg === "--out") {
 			const value = args[i + 1];
 			if (!value) return { ok: false, message: "Missing value for --out" };
@@ -159,53 +231,6 @@ function parseReportArgs(args: string[]): ParsedArgsResult<ReportCliOptions> {
 	return { ok: true, options };
 }
 
-function serializeForecastResults(
-	results: ForecastAccountResult[],
-	liveQuotaByIndex: Map<number, CodexQuotaSnapshot>,
-	refreshFailures: Map<number, TokenFailure>,
-): Array<{
-	index: number;
-	label: string;
-	isCurrent: boolean;
-	availability: ForecastAccountResult["availability"];
-	riskScore: number;
-	riskLevel: ForecastAccountResult["riskLevel"];
-	waitMs: number;
-	reasons: string[];
-	liveQuota?: {
-		status: number;
-		planType?: string;
-		activeLimit?: number;
-		model: string;
-		summary: string;
-	};
-	refreshFailure?: TokenFailure;
-}> {
-	return results.map((result) => {
-		const liveQuota = liveQuotaByIndex.get(result.index);
-		return {
-			index: result.index,
-			label: result.label,
-			isCurrent: result.isCurrent,
-			availability: result.availability,
-			riskScore: result.riskScore,
-			riskLevel: result.riskLevel,
-			waitMs: result.waitMs,
-			reasons: result.reasons,
-			liveQuota: liveQuota
-				? {
-						status: liveQuota.status,
-						planType: liveQuota.planType,
-						activeLimit: liveQuota.activeLimit,
-						model: liveQuota.model,
-						summary: formatQuotaSnapshotLine(liveQuota),
-					}
-				: undefined,
-			refreshFailure: refreshFailures.get(result.index),
-		};
-	});
-}
-
 function inspectRequestedModel(requestedModel: string): ModelInspection {
 	const normalized = resolveNormalizedModel(requestedModel);
 	const profile = getModelProfile(normalized);
@@ -259,77 +284,6 @@ async function defaultWriteFile(path: string, contents: string): Promise<void> {
 	}
 }
 
-async function saveAccountsWithRetry(
-	storage: AccountStorageV3,
-	saveAccounts: ReportCommandDeps["saveAccounts"],
-): Promise<void> {
-	for (let attempt = 0; ; attempt += 1) {
-		try {
-			await saveAccounts(storage);
-			return;
-		} catch (error) {
-			if (!isRetryableWriteError(error) || attempt >= 3) {
-				throw error;
-			}
-			await sleep(10 * 2 ** attempt);
-		}
-	}
-}
-
-type AccountIdentityMatch = Pick<
-	AccountMetadataV3,
-	"accountId" | "email" | "refreshToken"
->;
-type RefreshedAccountPatch = Pick<
-	AccountMetadataV3,
-	"refreshToken" | "accessToken" | "expiresAt"
-> & {
-	email?: AccountMetadataV3["email"];
-	accountId?: AccountMetadataV3["accountId"];
-	accountIdSource?: AccountMetadataV3["accountIdSource"];
-};
-
-function applyRefreshedAccountPatch(
-	account: AccountMetadataV3,
-	patch: RefreshedAccountPatch,
-): void {
-	account.refreshToken = patch.refreshToken;
-	account.accessToken = patch.accessToken;
-	account.expiresAt = patch.expiresAt;
-	if (patch.email) account.email = patch.email;
-	if (patch.accountId) {
-		account.accountId = patch.accountId;
-		account.accountIdSource = patch.accountIdSource;
-	}
-}
-
-async function persistRefreshedAccountPatch(
-	storage: AccountStorageV3,
-	accountMatch: AccountIdentityMatch,
-	patch: RefreshedAccountPatch,
-	loadAccounts: ReportCommandDeps["loadAccounts"],
-	saveAccounts: ReportCommandDeps["saveAccounts"],
-): Promise<void> {
-	const latestStorage = (await loadAccounts()) ?? storage;
-	const nextStorage = structuredClone(latestStorage);
-	const targetIndex =
-		findMatchingAccountIndex(nextStorage.accounts, accountMatch, {
-			allowUniqueAccountIdFallbackWithoutEmail: true,
-		}) ??
-		findMatchingAccountIndex(nextStorage.accounts, patch, {
-			allowUniqueAccountIdFallbackWithoutEmail: true,
-		});
-	if (targetIndex === undefined) {
-		throw new Error("Unable to resolve refreshed account for persistence");
-	}
-	const targetAccount = nextStorage.accounts[targetIndex];
-	if (!targetAccount) {
-		throw new Error("Unable to resolve refreshed account for persistence");
-	}
-	applyRefreshedAccountPatch(targetAccount, patch);
-	await saveAccountsWithRetry(nextStorage, saveAccounts);
-}
-
 export async function runReportCommand(
 	args: string[],
 	deps: ReportCommandDeps,
@@ -354,22 +308,41 @@ export async function runReportCommand(
 	deps.setStoragePath(null);
 	const storagePath = deps.getStoragePath();
 	const storage = await deps.loadAccounts();
+	const storageHealth = await deps.inspectStorageHealth?.();
 	const now = deps.getNow?.() ?? Date.now();
 	const accountCount = storage?.accounts.length ?? 0;
 	const activeIndex = storage ? deps.resolveActiveIndex(storage, "codex") : 0;
 	const refreshFailures = new Map<number, TokenFailure>();
 	const liveQuotaByIndex = new Map<number, CodexQuotaSnapshot>();
 	const probeErrors: string[] = [];
+	let consideredLiveAccounts = 0;
+	let executedLiveProbes = 0;
 
 	if (storage && options.live) {
 		for (let i = 0; i < storage.accounts.length; i += 1) {
+			if (
+				typeof options.maxAccounts === "number" &&
+				consideredLiveAccounts >= options.maxAccounts
+			) {
+				probeErrors.push(
+					`live probe account budget reached (${options.maxAccounts})`,
+				);
+				break;
+			}
 			const account = storage.accounts[i];
 			if (!account || account.enabled === false) continue;
+			consideredLiveAccounts += 1;
 
 			let probeAccessToken = account.accessToken;
 			let probeAccountId =
 				account.accountId ?? extractAccountId(account.accessToken);
 			if (!deps.hasUsableAccessToken(account, now)) {
+				if (options.cachedOnly) {
+					probeErrors.push(
+						`${formatAccountLabel(account, i)}: skipped refresh because --cached-only is enabled`,
+					);
+					continue;
+				}
 				const refreshResult = await deps.queuedRefresh(account.refreshToken);
 				if (refreshResult.type !== "success") {
 					refreshFailures.set(i, {
@@ -409,9 +382,6 @@ export async function runReportCommand(
 					email: previousEmail,
 					accountId: previousAccountId,
 				};
-				applyRefreshedAccountPatch(account, refreshPatch);
-				probeAccessToken = refreshResult.access;
-				probeAccountId = account.accountId ?? refreshedAccountId;
 				if (
 					previousRefreshToken !== refreshPatch.refreshToken ||
 					previousAccessToken !== refreshPatch.accessToken ||
@@ -435,7 +405,11 @@ export async function runReportCommand(
 						probeErrors.push(`${formatAccountLabel(account, i)}: ${message}`);
 						continue;
 					}
+					applyRefreshedAccountPatch(account, refreshPatch);
 				}
+				probeAccessToken = refreshResult.access;
+				probeAccountId =
+					refreshPatch.accountId ?? account.accountId ?? refreshedAccountId;
 			}
 
 			if (!probeAccessToken || !probeAccountId) {
@@ -444,8 +418,16 @@ export async function runReportCommand(
 				);
 				continue;
 			}
+			if (
+				typeof options.maxProbes === "number" &&
+				executedLiveProbes >= options.maxProbes
+			) {
+				probeErrors.push(`live probe request budget reached (${options.maxProbes})`);
+				break;
+			}
 
 			try {
+				executedLiveProbes += 1;
 				const liveQuota = await deps.fetchCodexQuotaSnapshot({
 					accountId: probeAccountId,
 					accessToken: probeAccessToken,
@@ -497,6 +479,7 @@ export async function runReportCommand(
 		command: "report",
 		generatedAt: new Date(now).toISOString(),
 		storagePath,
+		storageHealth,
 		model: requestedModel,
 		modelSelection: {
 			requested: modelInspection.requested,
@@ -506,6 +489,13 @@ export async function runReportCommand(
 			capabilities: modelInspection.capabilities,
 		},
 		liveProbe: options.live,
+		liveProbeBudget: {
+			cachedOnly: options.cachedOnly,
+			maxAccounts: options.maxAccounts ?? null,
+			maxProbes: options.maxProbes ?? null,
+			consideredAccounts: consideredLiveAccounts,
+			executedProbes: executedLiveProbes,
+		},
 		accounts: {
 			total: accountCount,
 			enabled: enabledCount,
@@ -516,15 +506,30 @@ export async function runReportCommand(
 		activeIndex: accountCount > 0 ? activeIndex + 1 : null,
 		forecast: {
 			summary: forecastSummary,
-			recommendation,
+			recommendation: {
+				...recommendation,
+				selectedReason:
+					recommendation.recommendedIndex !== null
+						? forecastResults[recommendation.recommendedIndex]?.reasons[0] ?? recommendation.reason
+						: recommendation.reason,
+			},
 			probeErrors,
 			accounts: serializeForecastResults(
 				forecastResults,
 				liveQuotaByIndex,
 				refreshFailures,
+				formatQuotaSnapshotLine,
 			),
 		},
+		runtime: await deps.loadRuntimeObservabilitySnapshot?.(),
 	};
+	if (report.forecast.recommendation.recommendedIndex !== null) {
+		const selectedIndex = report.forecast.recommendation.recommendedIndex;
+		const selected = report.forecast.accounts[selectedIndex];
+		if (selected) {
+			selected.selected = true;
+		}
+	}
 
 	const cwd = deps.getCwd?.() ?? process.cwd();
 	if (options.outPath) {
@@ -542,7 +547,26 @@ export async function runReportCommand(
 
 	logInfo(`Report generated at ${report.generatedAt}`);
 	logInfo(`Storage: ${report.storagePath}`);
+	if (report.storageHealth) {
+		logInfo(`Storage health: ${report.storageHealth.state}`);
+	}
 	logInfo(`Model: ${formatModelInspection(modelInspection)}`);
+	if (options.live) {
+		const budgetParts = [
+			`considered ${consideredLiveAccounts} account(s)`,
+			`executed ${executedLiveProbes} probe(s)`,
+		];
+		if (typeof options.maxAccounts === "number") {
+			budgetParts.push(`max-accounts ${options.maxAccounts}`);
+		}
+		if (typeof options.maxProbes === "number") {
+			budgetParts.push(`max-probes ${options.maxProbes}`);
+		}
+		if (options.cachedOnly) {
+			budgetParts.push("cached-only");
+		}
+		logInfo(`Live probe budget: ${budgetParts.join(", ")}`);
+	}
 	logInfo(
 		`Accounts: ${report.accounts.total} total (${report.accounts.enabled} enabled, ${report.accounts.disabled} disabled, ${report.accounts.coolingDown} cooling, ${report.accounts.rateLimited} rate-limited)`,
 	);
@@ -565,6 +589,11 @@ export async function runReportCommand(
 	if (report.forecast.probeErrors.length > 0) {
 		logInfo(`Probe notes: ${report.forecast.probeErrors.length}`);
 	}
+	if (report.runtime) {
+		logInfo(
+			`Runtime traffic: responses=${report.runtime.responsesRequests}, refresh=${report.runtime.authRefreshRequests}, probes=${report.runtime.diagnosticProbeRequests}`,
+		);
+	}
 	if (options.explain) {
 		logInfo("");
 		for (const account of report.forecast.accounts) {
diff --git a/lib/codex-manager/commands/status.ts b/lib/codex-manager/commands/status.ts
index 4851795b..c0b4f8de 100644
--- a/lib/codex-manager/commands/status.ts
+++ b/lib/codex-manager/commands/status.ts
@@ -3,8 +3,13 @@ import {
 	formatCooldown,
 	formatWaitTime,
 } from "../../accounts.js";
+import {
+	evaluateForecastAccounts,
+	recommendForecastAccount,
+} from "../../forecast.js";
 import type { ModelFamily } from "../../prompts/codex.js";
-import type { AccountStorageV3 } from "../../storage.js";
+import type { RuntimeObservabilitySnapshot } from "../../runtime/runtime-observability.js";
+import type { AccountStorageV3, StorageHealthSummary } from "../../storage.js";
 
 type LoadedStorage = AccountStorageV3 | null;
 
@@ -21,6 +26,8 @@ export interface StatusCommandDeps {
 		now: number,
 		family: ModelFamily,
 	) => string | null;
+	loadRuntimeObservabilitySnapshot?: () => Promise<RuntimeObservabilitySnapshot | null>;
+	inspectStorageHealth?: () => Promise<StorageHealthSummary>;
 	getNow?: () => number;
 	logInfo?: (message: string) => void;
 }
@@ -31,17 +38,71 @@ export async function runStatusCommand(
 	deps.setStoragePath(null);
 	const storage = await deps.loadAccounts();
 	const path = deps.getStoragePath();
+	const storageHealth = await deps.inspectStorageHealth?.();
 	const logInfo = deps.logInfo ?? console.log;
 	if (!storage || storage.accounts.length === 0) {
-		logInfo("No accounts configured.");
+		logInfo(
+			storageHealth?.state === "intentional-reset"
+				? "No accounts configured. Storage was intentionally reset."
+				: storageHealth?.state === "recoverable"
+					? "No accounts configured. Recovery artifacts are available."
+					: storageHealth?.state === "corrupt"
+						? "No accounts configured. Storage appears corrupted."
+						: "No accounts configured.",
+		);
 		logInfo(`Storage: ${path}`);
+		if (storageHealth) {
+			logInfo(`Storage health: ${storageHealth.state}`);
+		}
 		return 0;
 	}
 
 	const now = deps.getNow?.() ?? Date.now();
 	const activeIndex = deps.resolveActiveIndex(storage, "codex");
+	const forecastResults = evaluateForecastAccounts(
+		storage.accounts.map((account, index) => ({
+			index,
+			account,
+			isCurrent: index === activeIndex,
+			now,
+		})),
+	);
+	const recommendation = recommendForecastAccount(forecastResults);
 	logInfo(`Accounts (${storage.accounts.length})`);
 	logInfo(`Storage: ${path}`);
+	if (recommendation.recommendedIndex !== null) {
+		logInfo(
+			`Selection reason: account ${recommendation.recommendedIndex + 1} (${recommendation.reason})`,
+		);
+	}
+	if (storageHealth) {
+		logInfo(`Storage health: ${storageHealth.state}`);
+	}
+	const runtimeSnapshot = await deps.loadRuntimeObservabilitySnapshot?.();
+	if (runtimeSnapshot) {
+		const runtimeMetrics = runtimeSnapshot.runtimeMetrics;
+		const poolCooldown =
+			typeof runtimeSnapshot.poolExhaustionCooldownUntil === "number" &&
+			runtimeSnapshot.poolExhaustionCooldownUntil > now
+				? formatWaitTime(runtimeSnapshot.poolExhaustionCooldownUntil - now)
+				: null;
+		const serverCooldown =
+			typeof runtimeSnapshot.serverBurstCooldownUntil === "number" &&
+			runtimeSnapshot.serverBurstCooldownUntil > now
+				? formatWaitTime(runtimeSnapshot.serverBurstCooldownUntil - now)
+				: null;
+		logInfo(
+			`Runtime: responses=${runtimeSnapshot.responsesRequests}, refresh=${runtimeSnapshot.authRefreshRequests}, probes=${runtimeSnapshot.diagnosticProbeRequests}, budgetExhaustions=${runtimeMetrics.requestAttemptBudgetExhaustions}`,
+		);
+		if (poolCooldown || serverCooldown) {
+			logInfo(
+				`Cooldowns: pool=${poolCooldown ?? "none"}, server-burst=${serverCooldown ?? "none"}`,
+			);
+		}
+		if (runtimeSnapshot.currentRequestId) {
+			logInfo(`Last request trace: ${runtimeSnapshot.currentRequestId}`);
+		}
+	}
 	logInfo("");
 
 	for (let i = 0; i < storage.accounts.length; i += 1) {
@@ -61,6 +122,10 @@ export async function runStatusCommand(
 				? `used ${formatWaitTime(now - account.lastUsed)} ago`
 				: "never used";
 		logInfo(`${i + 1}. ${label}${markerLabel} ${lastUsed}`);
+		const primaryReason = forecastResults[i]?.reasons[0];
+		if (primaryReason) {
+			logInfo(`   reason: ${primaryReason}`);
+		}
 	}
 
 	return 0;
diff --git a/lib/codex-manager/forecast-report-shared.ts b/lib/codex-manager/forecast-report-shared.ts
new file mode 100644
index 00000000..f9768d72
--- /dev/null
+++ b/lib/codex-manager/forecast-report-shared.ts
@@ -0,0 +1,140 @@
+import type { ForecastAccountResult } from "../forecast.js";
+import type { CodexQuotaSnapshot } from "../quota-probe.js";
+import {
+	findMatchingAccountIndex,
+	type AccountMetadataV3,
+	type AccountStorageV3,
+} from "../storage.js";
+import type { TokenFailure } from "../types.js";
+import { sleep } from "../utils.js";
+
+const RETRYABLE_STORAGE_WRITE_CODES = new Set(["EBUSY", "EPERM"]);
+
+export type AccountIdentityMatch = Pick<
+	AccountMetadataV3,
+	"accountId" | "email" | "refreshToken"
+>;
+
+export type RefreshedAccountPatch = Pick<
+	AccountMetadataV3,
+	"refreshToken" | "accessToken" | "expiresAt"
+> & {
+	email?: AccountMetadataV3["email"];
+	accountId?: AccountMetadataV3["accountId"];
+	accountIdSource?: AccountMetadataV3["accountIdSource"];
+};
+
+export function isRetryableStorageWriteError(error: unknown): boolean {
+	const code = (error as NodeJS.ErrnoException | undefined)?.code;
+	return typeof code === "string" && RETRYABLE_STORAGE_WRITE_CODES.has(code);
+}
+
+export async function saveAccountsWithRetry(
+	storage: AccountStorageV3,
+	saveAccounts: (storage: AccountStorageV3) => Promise<void>,
+): Promise<void> {
+	for (let attempt = 0; ; attempt += 1) {
+		try {
+			await saveAccounts(storage);
+			return;
+		} catch (error) {
+			if (!isRetryableStorageWriteError(error) || attempt >= 3) {
+				throw error;
+			}
+			await sleep(10 * 2 ** attempt);
+		}
+	}
+}
+
+export function applyRefreshedAccountPatch(
+	account: AccountMetadataV3,
+	patch: RefreshedAccountPatch,
+): void {
+	account.refreshToken = patch.refreshToken;
+	account.accessToken = patch.accessToken;
+	account.expiresAt = patch.expiresAt;
+	if (patch.email) account.email = patch.email;
+	if (patch.accountId) {
+		account.accountId = patch.accountId;
+		account.accountIdSource = patch.accountIdSource;
+	}
+}
+
+export async function persistRefreshedAccountPatch(
+	storage: AccountStorageV3,
+	accountMatch: AccountIdentityMatch,
+	patch: RefreshedAccountPatch,
+	loadAccounts: () => Promise<AccountStorageV3 | null>,
+	saveAccounts: (storage: AccountStorageV3) => Promise<void>,
+): Promise<void> {
+	const latestStorage = (await loadAccounts()) ?? storage;
+	const nextStorage = structuredClone(latestStorage);
+	const targetIndex =
+		findMatchingAccountIndex(nextStorage.accounts, accountMatch, {
+			allowUniqueAccountIdFallbackWithoutEmail: true,
+		}) ??
+		findMatchingAccountIndex(nextStorage.accounts, patch, {
+			allowUniqueAccountIdFallbackWithoutEmail: true,
+		});
+	if (targetIndex === undefined) {
+		throw new Error("Unable to resolve refreshed account for persistence");
+	}
+	const targetAccount = nextStorage.accounts[targetIndex];
+	if (!targetAccount) {
+		throw new Error("Unable to resolve refreshed account for persistence");
+	}
+	applyRefreshedAccountPatch(targetAccount, patch);
+	await saveAccountsWithRetry(nextStorage, saveAccounts);
+}
+
+export function serializeForecastResults(
+	results: ForecastAccountResult[],
+	liveQuotaByIndex: Map<number, CodexQuotaSnapshot>,
+	refreshFailures: Map<number, TokenFailure>,
+	formatQuotaSnapshotLine: (snapshot: CodexQuotaSnapshot) => string,
+): Array<{
+	index: number;
+	label: string;
+	isCurrent: boolean;
+	selected: boolean;
+	primaryReason?: string;
+	availability: ForecastAccountResult["availability"];
+	riskScore: number;
+	riskLevel: ForecastAccountResult["riskLevel"];
+	waitMs: number;
+	reasons: string[];
+	liveQuota?: {
+		status: number;
+		planType?: string;
+		activeLimit?: number;
+		model: string;
+		summary: string;
+	};
+	refreshFailure?: TokenFailure;
+}> {
+	return results.map((result) => {
+		const liveQuota = liveQuotaByIndex.get(result.index);
+		return {
+			index: result.index,
+			label: result.label,
+			isCurrent: result.isCurrent,
+			selected: false,
+			primaryReason: result.reasons[0],
+			availability: result.availability,
+			riskScore: result.riskScore,
+			riskLevel: result.riskLevel,
+			waitMs: result.waitMs,
+			reasons: result.reasons,
+			liveQuota: liveQuota
+				? {
+						status: liveQuota.status,
+						planType: liveQuota.planType,
+						activeLimit: liveQuota.activeLimit,
+						model: liveQuota.model,
+						summary: formatQuotaSnapshotLine(liveQuota),
+				  }
+				: undefined,
+			refreshFailure: refreshFailures.get(result.index),
+		};
+	});
+}
diff --git a/lib/codex-manager/repair-commands.ts b/lib/codex-manager/repair-commands.ts
index b139a6ff..079aa356 100644
--- a/lib/codex-manager/repair-commands.ts
+++ b/lib/codex-manager/repair-commands.ts
@@ -1687,7 +1687,14 @@ export async function runDoctor(
 		try {
 			const raw = await fs.readFile(codexAuthPath, "utf-8");
 			const parsed = JSON.parse(raw) as unknown;
-			if (parsed && typeof parsed === "object") {
+			if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+				addCheck({
+					key: "codex-auth-readable",
+					severity: "error",
+					message: "Codex auth file has invalid structure",
+					details: codexAuthPath,
+				});
+			} else {
 				const payload = parsed as Record<string, unknown>;
 				const tokens = payload.tokens && typeof payload.tokens === "object"
 					? payload.tokens as Record<string, unknown>
@@ -1708,19 +1715,19 @@ export async function runDoctor(
 					emailFromFile ?? extractAccountEmail(accessToken, idToken),
 				);
 				codexAuthAccountId = accountIdFromFile ?? extractAccountId(accessToken);
+				addCheck({
+					key: "codex-auth-readable",
+					severity: "ok",
+					message: "Codex auth file is readable",
+					details:
+						codexAuthEmail || codexAuthAccountId
+							? formatDoctorIdentitySummary({
+								email: codexAuthEmail,
+								accountId: codexAuthAccountId,
+							})
+							: undefined,
+				});
 			}
-			addCheck({
-				key: "codex-auth-readable",
-				severity: "ok",
-				message: "Codex auth file is readable",
-				details:
-					codexAuthEmail || codexAuthAccountId
-						? formatDoctorIdentitySummary({
-							email: codexAuthEmail,
-							accountId: codexAuthAccountId,
-						})
-						: undefined,
-			});
 		} catch (error) {
 			addCheck({
 				key: "codex-auth-readable",
diff --git a/lib/config.ts b/lib/config.ts
index fbbf932f..3b567788 100644
--- a/lib/config.ts
+++ b/lib/config.ts
@@ -166,9 +166,9 @@ export const DEFAULT_PLUGIN_CONFIG: PluginConfig = {
 	fastSession: false,
 	fastSessionStrategy: "hybrid",
 	fastSessionMaxInputItems: 30,
-	retryAllAccountsRateLimited: true,
+	retryAllAccountsRateLimited: false,
 	retryAllAccountsMaxWaitMs: 0,
-	retryAllAccountsMaxRetries: Infinity,
+	retryAllAccountsMaxRetries: 0,
 	unsupportedCodexPolicy: "strict",
 	fallbackOnUnsupportedCodexModel: false,
 	fallbackToGpt52OnUnsupportedGpt53: true,
@@ -183,6 +183,10 @@ export const DEFAULT_PLUGIN_CONFIG: PluginConfig = {
 	parallelProbingMaxConcurrency: 2,
 	emptyResponseMaxRetries: 2,
 	emptyResponseRetryDelayMs: 1_000,
+	rateLimitDedupWindowMs: 2_000,
+	rateLimitStateResetMs: 120_000,
+	rateLimitMaxBackoffMs: 60_000,
+	rateLimitShortRetryThresholdMs: 5_000,
 	pidOffsetEnabled: false,
 	fetchTimeoutMs: 60_000,
 	streamStallTimeoutMs: 45_000,
@@ -873,7 +877,7 @@ export function getRetryAllAccountsRateLimited(
 	return resolveBooleanSetting(
 		"CODEX_AUTH_RETRY_ALL_RATE_LIMITED",
 		pluginConfig.retryAllAccountsRateLimited,
-		true,
+		false,
 	);
 }
 
@@ -894,7 +898,7 @@ export function getRetryAllAccountsMaxRetries(
 	return resolveNumberSetting(
 		"CODEX_AUTH_RETRY_ALL_MAX_RETRIES",
 		pluginConfig.retryAllAccountsMaxRetries,
-		Infinity,
+		0,
 		{ min: 0 },
 	);
 }
@@ -1076,6 +1080,44 @@ export function getEmptyResponseRetryDelayMs(
 	);
 }
 
+export function getRateLimitDedupWindowMs(pluginConfig: PluginConfig): number {
+	return resolveNumberSetting(
+		"CODEX_AUTH_RATE_LIMIT_DEDUP_WINDOW_MS",
+		pluginConfig.rateLimitDedupWindowMs,
+		DEFAULT_PLUGIN_CONFIG.rateLimitDedupWindowMs ?? 2_000,
+		{ min: 0 },
+	);
+}
+
+export function getRateLimitStateResetMs(pluginConfig: PluginConfig): number {
+	return resolveNumberSetting(
+		"CODEX_AUTH_RATE_LIMIT_STATE_RESET_MS",
+		pluginConfig.rateLimitStateResetMs,
+		DEFAULT_PLUGIN_CONFIG.rateLimitStateResetMs ?? 120_000,
+		{ min: 1_000 },
+	);
+}
+
+export function getRateLimitMaxBackoffMs(pluginConfig: PluginConfig): number {
+	return resolveNumberSetting(
+		"CODEX_AUTH_RATE_LIMIT_MAX_BACKOFF_MS",
+		pluginConfig.rateLimitMaxBackoffMs,
+		DEFAULT_PLUGIN_CONFIG.rateLimitMaxBackoffMs ?? 60_000,
+		{ min: 1_000 },
+	);
+}
+
+export function getRateLimitShortRetryThresholdMs(
+	pluginConfig: PluginConfig,
+): number {
+	return resolveNumberSetting(
+		"CODEX_AUTH_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS",
+		pluginConfig.rateLimitShortRetryThresholdMs,
+		DEFAULT_PLUGIN_CONFIG.rateLimitShortRetryThresholdMs ?? 5_000,
+		{ min: 0 },
+	);
+}
+
 export function getPidOffsetEnabled(pluginConfig: PluginConfig): boolean {
 	return resolveBooleanSetting(
 		"CODEX_AUTH_PID_OFFSET_ENABLED",
@@ -1678,6 +1720,26 @@ const CONFIG_EXPLAIN_ENTRIES: ConfigExplainMeta[] = [
 		envNames: ["CODEX_AUTH_EMPTY_RESPONSE_RETRY_DELAY_MS"],
 		getValue: getEmptyResponseRetryDelayMs,
 	},
+	{
+		key: "rateLimitDedupWindowMs",
+		envNames: ["CODEX_AUTH_RATE_LIMIT_DEDUP_WINDOW_MS"],
+		getValue: getRateLimitDedupWindowMs,
+	},
+	{
+		key: "rateLimitStateResetMs",
+		envNames: ["CODEX_AUTH_RATE_LIMIT_STATE_RESET_MS"],
+		getValue: getRateLimitStateResetMs,
+	},
+	{
+		key: "rateLimitMaxBackoffMs",
+		envNames: ["CODEX_AUTH_RATE_LIMIT_MAX_BACKOFF_MS"],
+		getValue: getRateLimitMaxBackoffMs,
+	},
+	{
+		key: "rateLimitShortRetryThresholdMs",
+		envNames: ["CODEX_AUTH_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS"],
+		getValue: getRateLimitShortRetryThresholdMs,
+	},
 	{
 		key: "pidOffsetEnabled",
 		envNames: ["CODEX_AUTH_PID_OFFSET_ENABLED"],
@@ -1781,10 +1843,11 @@ export function getPluginConfigExplainReport(): ConfigExplainReport {
 	const storedRecord = stored.record ?? null;
 	const entries = CONFIG_EXPLAIN_ENTRIES.map((entry) => {
 		const value = entry.getValue(pluginConfig);
+		const defaultValue = DEFAULT_PLUGIN_CONFIG[entry.key];
 		return {
 			key: entry.key,
 			value: normalizeConfigExplainValue(value),
-			defaultValue: normalizeConfigExplainValue(DEFAULT_PLUGIN_CONFIG[entry.key]),
+			defaultValue: normalizeConfigExplainValue(defaultValue),
 			source: resolveConfigExplainSource(
 				entry,
 				pluginConfig,
diff --git a/lib/index.ts b/lib/index.ts
index 0e259846..8d89351d 100644
--- a/lib/index.ts
+++ b/lib/index.ts
@@ -8,7 +8,17 @@ export * from "./auth/auth.js";
 export * from "./request/fetch-helpers.js";
 export * from "./request/request-transformer.js";
 export * from "./request/response-handler.js";
-export * from "./request/rate-limit-backoff.js";
+export {
+	MAX_SHORT_RETRY_ATTEMPTS,
+	calculateBackoffMs,
+	clearRateLimitBackoffState,
+	configureRateLimitBackoff,
+	getRateLimitBackoff,
+	getRateLimitBackoffWithReason,
+	getRateLimitShortRetryThresholdMs as getConfiguredRateLimitShortRetryThresholdMs,
+	resetRateLimitBackoff,
+	resetRateLimitBackoffConfig,
+} from "./request/rate-limit-backoff.js";
 export * from "./prompts/codex.js";
 export * from "./shutdown.js";
 export * from "./circuit-breaker.js";
diff --git a/lib/parallel-probe.ts b/lib/parallel-probe.ts
index b4c4e1cb..44ba3755 100644
--- a/lib/parallel-probe.ts
+++ b/lib/parallel-probe.ts
@@ -38,6 +38,34 @@ export interface GetTopCandidatesParams {
 	maxCandidates: number;
 }
 
+function isAccountManager(value: unknown): value is AccountManager {
+	return (
+		typeof value === "object" &&
+		value !== null &&
+		"getAccountsSnapshot" in value &&
+		typeof value.getAccountsSnapshot === "function"
+	);
+}
+
+function isGetTopCandidatesParams(value: unknown): value is GetTopCandidatesParams {
+	return (
+		typeof value === "object" &&
+		value !== null &&
+		"accountManager" in value &&
+		isAccountManager(value.accountManager) &&
+		"modelFamily" in value &&
+		typeof value.modelFamily === "string" &&
+		"model" in value &&
+		(typeof value.model === "string" || value.model === null) &&
+		"maxCandidates" in value &&
+		typeof value.maxCandidates === "number"
+	);
+}
+
+function toProbeError(error: unknown): Error {
+	return error instanceof Error ? error : new Error(String(error));
+}
+
 /**
  * Get top N candidates ranked by hybrid score WITHOUT mutating AccountManager state.
  * Uses getAccountsSnapshot() and ranks by health + tokens + freshness.
@@ -64,13 +92,19 @@ export function getTopCandidates(
 	let resolvedMaxCandidates: number | undefined;
 
 	if (useNamedParams) {
-		const namedParams = accountManagerOrParams as GetTopCandidatesParams;
+		if (!isGetTopCandidatesParams(accountManagerOrParams)) {
+			throw new TypeError("getTopCandidates requires accountManager");
+		}
+		const namedParams = accountManagerOrParams;
 		resolvedAccountManager = namedParams.accountManager;
 		resolvedModelFamily = namedParams.modelFamily;
 		resolvedModel = namedParams.model;
 		resolvedMaxCandidates = namedParams.maxCandidates;
 	} else {
-		resolvedAccountManager = accountManagerOrParams as AccountManager;
+		if (!isAccountManager(accountManagerOrParams)) {
+			throw new TypeError("getTopCandidates requires accountManager");
+		}
+		resolvedAccountManager = accountManagerOrParams;
 		resolvedModelFamily = modelFamily;
 		resolvedModel = model;
 		resolvedMaxCandidates = maxCandidates;
@@ -158,7 +192,7 @@ export async function probeAccountsInParallel<T>(
 			const response = await probeFn(account, controller.signal);
 			return { type: "success", account, response };
 		} catch (error) {
-			return { type: "failure", account, error: error as Error };
+			return { type: "failure", account, error: toProbeError(error) };
 		}
 	}
 
diff --git a/lib/preemptive-quota-scheduler.ts b/lib/preemptive-quota-scheduler.ts
index 605c4831..cfee86ab 100644
--- a/lib/preemptive-quota-scheduler.ts
+++ b/lib/preemptive-quota-scheduler.ts
@@ -238,14 +238,27 @@ export class PreemptiveQuotaScheduler {
 		const snapshot = this.snapshots.get(key);
 		if (!snapshot) return { defer: false, waitMs: 0 };
 
+		const primaryWait =
+			typeof snapshot.primary.resetAtMs === "number" &&
+			Number.isFinite(snapshot.primary.resetAtMs) &&
+			snapshot.primary.resetAtMs > now
+				? snapshot.primary.resetAtMs - now
+				: 0;
+		const secondaryWait =
+			typeof snapshot.secondary.resetAtMs === "number" &&
+			Number.isFinite(snapshot.secondary.resetAtMs) &&
+			snapshot.secondary.resetAtMs > now
+				? snapshot.secondary.resetAtMs - now
+				: 0;
+
 		const waitCandidates = [snapshot.primary.resetAtMs, snapshot.secondary.resetAtMs]
 			.filter((value): value is number => typeof value === "number" && Number.isFinite(value) && value > now)
 			.map((value) => value - now)
 			.filter((value) => value > 0);
-		const nearestWait = waitCandidates.length > 0 ? Math.min(...waitCandidates) : 0;
+		const longestWait = waitCandidates.length > 0 ? Math.max(...waitCandidates) : 0;
 
-		if (snapshot.status === 429 && nearestWait > 0) {
-			const bounded = Math.min(nearestWait, this.maxDeferralMs);
+		if (snapshot.status === 429 && longestWait > 0) {
+			const bounded = Math.min(longestWait, this.maxDeferralMs);
 			if (bounded > 0) {
 				return { defer: true, waitMs: bounded, reason: "rate-limit" };
 			}
@@ -259,9 +272,12 @@ export class PreemptiveQuotaScheduler {
 			typeof snapshot.secondary.usedPercent === "number" &&
 			Number.isFinite(snapshot.secondary.usedPercent) &&
 			snapshot.secondary.usedPercent >= 100 - this.secondaryRemainingPercentThreshold;
-		const nearExhausted = primaryNearExhausted || secondaryNearExhausted;
-		if (nearExhausted && nearestWait > 0) {
-			const bounded = Math.min(nearestWait, this.maxDeferralMs);
+		const nearExhaustedWait = Math.max(
+			primaryNearExhausted ? primaryWait : 0,
+			secondaryNearExhausted ? secondaryWait : 0,
+		);
+		if (nearExhaustedWait > 0) {
+			const bounded = Math.min(nearExhaustedWait, this.maxDeferralMs);
 			if (bounded > 0) {
 				return { defer: true, waitMs: bounded, reason: "quota-near-exhaustion" };
 			}
diff --git a/lib/proactive-refresh.ts b/lib/proactive-refresh.ts
index 4e5d4a4b..c718dba6 100644
--- a/lib/proactive-refresh.ts
+++ b/lib/proactive-refresh.ts
@@ -20,6 +20,7 @@ import {
 	sanitizeEmail,
 	shouldUpdateAccountIdFromToken,
 } from "./auth/token-utils.js";
+import { mutateRuntimeObservabilitySnapshot } from "./runtime/runtime-observability.js";
 
 const log = createLogger("proactive-refresh");
 
@@ -28,6 +29,7 @@ export const DEFAULT_PROACTIVE_BUFFER_MS = 5 * 60 * 1000;
 
 /** Minimum buffer to prevent unnecessary refreshes (30 seconds) */
 export const MIN_PROACTIVE_BUFFER_MS = 30 * 1000;
+const PROACTIVE_REFRESH_STAGGER_MS = 250;
 
 /**
  * Result of a proactive refresh operation.
@@ -113,6 +115,10 @@ export async function proactiveRefreshAccount(
 		expiresInMinutes: Math.round(timeUntilExpiry / 60000),
 	});
 
+	mutateRuntimeObservabilitySnapshot((snapshot) => {
+		snapshot.authRefreshRequests += 1;
+		snapshot.runtimeMetrics.authRefreshRequests += 1;
+	});
 	const result = await queuedRefresh(account.refreshToken);
 
 	if (result.type === "success") {
@@ -158,14 +164,17 @@ export async function refreshExpiringAccounts(
 
 	log.info(`Proactively refreshing ${accountsToRefresh.length} account(s)`);
 
-	// Refresh in parallel for efficiency
-	const refreshPromises = accountsToRefresh.map(async (account) => {
+	const outcomes: Array<{ index: number; result: ProactiveRefreshResult }> = [];
+	for (let index = 0; index < accountsToRefresh.length; index += 1) {
+		const account = accountsToRefresh[index];
+		if (!account) continue;
+		if (index > 0) {
+			await new Promise((resolve) => setTimeout(resolve, PROACTIVE_REFRESH_STAGGER_MS));
+		}
 		const result = await proactiveRefreshAccount(account, bufferMs);
 		await onResult?.(account, result);
-		return { index: account.index, result };
-	});
-
-	const outcomes = await Promise.all(refreshPromises);
+		outcomes.push({ index: account.index, result });
+	}
 
 	for (const { index, result } of outcomes) {
 		results.set(index, result);
diff --git a/lib/quota-cache.ts b/lib/quota-cache.ts
index 9870a2b6..82263dc4 100644
--- a/lib/quota-cache.ts
+++ b/lib/quota-cache.ts
@@ -33,6 +33,7 @@ interface QuotaCacheFile {
 const QUOTA_CACHE_PATH = join(getCodexMultiAuthDir(), "quota-cache.json");
 const QUOTA_CACHE_LABEL = basename(QUOTA_CACHE_PATH);
 const RETRYABLE_FS_CODES = new Set(["EBUSY", "EPERM"]);
+let quotaCacheWriteQueue: Promise<void> = Promise.resolve();
 
 function isRetryableFsError(error: unknown): boolean {
 	const code = (error as NodeJS.ErrnoException | undefined)?.code;
@@ -223,39 +224,48 @@ export async function saveQuotaCache(data: QuotaCacheData): Promise<void> {
 		byEmail: data.byEmail,
 	};
 
-	try {
-		await fs.mkdir(getCodexMultiAuthDir(), { recursive: true });
-		const tempPath = `${QUOTA_CACHE_PATH}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp`;
-		await fs.writeFile(tempPath, `${JSON.stringify(payload, null, 2)}\n`, {
-			encoding: "utf8",
-			mode: 0o600,
-		});
-		let renamed = false;
+	const writeTask = async (): Promise<void> => {
 		try {
-			for (let attempt = 0; attempt < 5; attempt += 1) {
-				try {
-					await fs.rename(tempPath, QUOTA_CACHE_PATH);
-					renamed = true;
-					break;
-				} catch (error) {
-					if (!isRetryableFsError(error) || attempt >= 4) throw error;
-					await sleep(10 * 2 ** attempt);
+			await fs.mkdir(getCodexMultiAuthDir(), { recursive: true });
+			const tempPath = `${QUOTA_CACHE_PATH}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp`;
+			await fs.writeFile(tempPath, `${JSON.stringify(payload, null, 2)}\n`, {
+				encoding: "utf8",
+				mode: 0o600,
+			});
+			let renamed = false;
+			try {
+				for (let attempt = 0; attempt < 5; attempt += 1) {
+					try {
+						await fs.rename(tempPath, QUOTA_CACHE_PATH);
+						renamed = true;
+						break;
+					} catch (error) {
+						if (!isRetryableFsError(error) || attempt >= 4) throw error;
+						await sleep(10 * 2 ** attempt);
+					}
 				}
-			}
-		} finally {
-			if (!renamed) {
-				try {
-					await fs.unlink(tempPath);
-				} catch {
-					// Best effort temp cleanup.
+			} finally {
+				if (!renamed) {
+					try {
+						await fs.unlink(tempPath);
+					} catch {
+						// Best effort temp cleanup.
+					}
 				}
 			}
+		} catch (error) {
+			logWarn(
+				`Failed to save quota cache to ${QUOTA_CACHE_LABEL}: ${
+					error instanceof Error ? error.message : String(error)
+				}`,
+			);
 		}
-	} catch (error) {
-		logWarn(
-			`Failed to save quota cache to ${QUOTA_CACHE_LABEL}: ${
-				error instanceof Error ? error.message : String(error)
-			}`,
-		);
-	}
+	};
+
+	const queued = quotaCacheWriteQueue.catch(() => undefined).then(writeTask);
+	quotaCacheWriteQueue = queued.then(
+		() => undefined,
+		() => undefined,
+	);
+	await queued;
 }
diff --git a/lib/quota-probe.ts b/lib/quota-probe.ts
index 9535b7f9..bf73c29c 100644
--- a/lib/quota-probe.ts
+++ b/lib/quota-probe.ts
@@ -1,6 +1,7 @@
 import { CODEX_BASE_URL } from "./constants.js";
 import { createCodexHeaders, getUnsupportedCodexModelInfo } from "./request/fetch-helpers.js";
 import { getCodexInstructions } from "./prompts/codex.js";
+import { mutateRuntimeObservabilitySnapshot } from "./runtime/runtime-observability.js";
 import type { RequestBody } from "./types.js";
 import { isRecord } from "./utils.js";
 
@@ -359,6 +360,10 @@ export async function fetchCodexQuotaSnapshot(
 			const timeout = setTimeout(() => controller.abort(), timeoutMs);
 			let response: Response;
 			try {
+				mutateRuntimeObservabilitySnapshot((snapshot) => {
+					snapshot.diagnosticProbeRequests += 1;
+					snapshot.runtimeMetrics.diagnosticProbeRequests += 1;
+				});
 				response = await fetch(`${CODEX_BASE_URL}/codex/responses`, {
 					method: "POST",
 					headers,
diff --git a/lib/request/failure-policy.ts b/lib/request/failure-policy.ts
index 28572345..e8fe8789 100644
--- a/lib/request/failure-policy.ts
+++ b/lib/request/failure-policy.ts
@@ -135,7 +135,7 @@ export function evaluateFailurePolicy(
 				markRateLimited: false,
 				removeAccount: false,
 				cooldownMs,
-				cooldownReason: cooldownMs > 0 ? "network-error" : undefined,
+				cooldownReason: cooldownMs > 0 ? "server-error" : undefined,
 				retrySameAccount,
 				retryDelayMs: retrySameAccount ? 500 : undefined,
 				handoffStrategy: "hard",
diff --git a/lib/request/fetch-helpers.ts b/lib/request/fetch-helpers.ts
index 6db96bd1..7e39b6e9 100644
--- a/lib/request/fetch-helpers.ts
+++ b/lib/request/fetch-helpers.ts
@@ -978,24 +978,40 @@ async function safeReadBody(response: Response): Promise<string> {
 }
 
 function mapUsageLimit404WithBody(response: Response, bodyText: string): Response | null {
-        if (response.status !== HTTP_STATUS.NOT_FOUND) return null;
-        if (!bodyText) return null;
+	if (response.status !== HTTP_STATUS.NOT_FOUND) return null;
+	if (!bodyText) return null;
 
 	let code = "";
+	let type = "";
 	try {
-		const parsed = JSON.parse(bodyText) as { error?: { code?: string | number; type?: string } };
-		code = (parsed?.error?.code ?? parsed?.error?.type ?? "").toString();
+		const parsed = JSON.parse(bodyText) as {
+			error?: { code?: string | number; type?: string | number };
+		};
+		code = (parsed?.error?.code ?? "").toString();
+		type = (parsed?.error?.type ?? "").toString();
 	} catch {
 		code = "";
+		type = "";
 	}
 
+	const normalizedSignals = [code, type]
+		.map((value) => value.toLowerCase())
+		.filter((value) => value.length > 0);
+
 	// Check for entitlement errors first - these should NOT be treated as rate limits
-	if (isEntitlementError(code, bodyText)) {
+	if (isEntitlementError(normalizedSignals.join(" "), bodyText)) {
 		return createEntitlementErrorResponse(bodyText);
 	}
 
-	const haystack = `${code} ${bodyText}`.toLowerCase();
-	if (!/usage_limit_reached|rate_limit_exceeded|usage limit/i.test(haystack)) {
+	// Only structured quota-limit codes should be remapped from 404 to 429.
+	// Free-text 404 bodies remain untouched, but known quota/rate-limit codes
+	// should still preserve retry semantics for callers.
+	if (
+		!normalizedSignals.some(
+			(value) =>
+				value.includes("usage_limit") || value.includes("rate_limit_exceeded"),
+		)
+	) {
 		return null;
 	}
 
diff --git a/lib/request/rate-limit-backoff.ts b/lib/request/rate-limit-backoff.ts
index 9b528b6e..a4030841 100644
--- a/lib/request/rate-limit-backoff.ts
+++ b/lib/request/rate-limit-backoff.ts
@@ -1,4 +1,5 @@
 import type { RateLimitReason } from "../accounts.js";
+import { DEFAULT_PLUGIN_CONFIG } from "../config.js";
 
 export interface RateLimitBackoffResult {
 	attempt: number;
@@ -14,29 +15,123 @@ export interface RateLimitBackoffResult {
  * - Deduplicate concurrent 429s so parallel requests don't over-increment backoff.
  * - Reset backoff after a quiet period.
  */
-const RATE_LIMIT_DEDUP_WINDOW_MS = 2000;
-const RATE_LIMIT_STATE_RESET_MS = 120_000;
-const MAX_BACKOFF_MS = 60_000;
+const DEFAULT_RATE_LIMIT_DEDUP_WINDOW_MS =
+	DEFAULT_PLUGIN_CONFIG.rateLimitDedupWindowMs ?? 2_000;
+const DEFAULT_RATE_LIMIT_STATE_RESET_MS =
+	DEFAULT_PLUGIN_CONFIG.rateLimitStateResetMs ?? 120_000;
+const DEFAULT_MAX_BACKOFF_MS =
+	DEFAULT_PLUGIN_CONFIG.rateLimitMaxBackoffMs ?? 60_000;
+const DEFAULT_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS =
+	DEFAULT_PLUGIN_CONFIG.rateLimitShortRetryThresholdMs ?? 5_000;
+const RATE_LIMIT_BACKOFF_JITTER_FACTOR = 0.2;
 
-export const RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS = 5000;
+interface RateLimitBackoffConfig {
+	dedupWindowMs: number;
+	stateResetMs: number;
+	maxBackoffMs: number;
+	shortRetryThresholdMs: number;
+}
+
+type StableAccountKey = string | null | undefined;
+
+let rateLimitBackoffConfig: RateLimitBackoffConfig = {
+	dedupWindowMs: DEFAULT_RATE_LIMIT_DEDUP_WINDOW_MS,
+	stateResetMs: DEFAULT_RATE_LIMIT_STATE_RESET_MS,
+	maxBackoffMs: DEFAULT_MAX_BACKOFF_MS,
+	shortRetryThresholdMs: DEFAULT_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
+};
+
+export function configureRateLimitBackoff(
+	overrides: Partial<RateLimitBackoffConfig> = {},
+): void {
+	if (
+		typeof overrides.dedupWindowMs === "number" &&
+		Number.isFinite(overrides.dedupWindowMs)
+	) {
+		rateLimitBackoffConfig.dedupWindowMs = Math.max(0, Math.floor(overrides.dedupWindowMs));
+	}
+	if (
+		typeof overrides.stateResetMs === "number" &&
+		Number.isFinite(overrides.stateResetMs)
+	) {
+		rateLimitBackoffConfig.stateResetMs = Math.max(1_000, Math.floor(overrides.stateResetMs));
+	}
+	if (
+		typeof overrides.maxBackoffMs === "number" &&
+		Number.isFinite(overrides.maxBackoffMs)
+	) {
+		rateLimitBackoffConfig.maxBackoffMs = Math.max(1_000, Math.floor(overrides.maxBackoffMs));
+	}
+	if (
+		typeof overrides.shortRetryThresholdMs === "number" &&
+		Number.isFinite(overrides.shortRetryThresholdMs)
+	) {
+		rateLimitBackoffConfig.shortRetryThresholdMs = Math.max(
+			0,
+			Math.floor(overrides.shortRetryThresholdMs),
+		);
+	}
+}
+
+export function resetRateLimitBackoffConfig(): void {
+	rateLimitBackoffConfig = {
+		dedupWindowMs: DEFAULT_RATE_LIMIT_DEDUP_WINDOW_MS,
+		stateResetMs: DEFAULT_RATE_LIMIT_STATE_RESET_MS,
+		maxBackoffMs: DEFAULT_MAX_BACKOFF_MS,
+		shortRetryThresholdMs: DEFAULT_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
+	};
+}
+
+export function getRateLimitShortRetryThresholdMs(): number {
+	return rateLimitBackoffConfig.shortRetryThresholdMs;
+}
+
+/**
+ * Maximum number of consecutive short-cooldown 429 retries before
+ * falling through to the long-cooldown rotation path.
+ *
+ * Without this bound, an upstream that perpetually returns short
+ * Retry-After values (≤ RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS) would
+ * keep the request loop spinning on the same account indefinitely.
+ */
+export const MAX_SHORT_RETRY_ATTEMPTS = 3;
 
 interface RateLimitState {
 	consecutive429: number;
 	lastAt: number;
 	quotaKey: string;
+	lastDelayMs: number;
 }
 
 const rateLimitStateByAccountQuota = new Map<string, RateLimitState>();
 
+function resolveRateLimitStateKey(
+	accountIndex: number,
+	quotaKey: string,
+	stableAccountKey?: StableAccountKey,
+): string {
+	const normalizedStableAccountKey = stableAccountKey?.trim();
+	const accountStateKey =
+		normalizedStableAccountKey && normalizedStableAccountKey.length > 0
+			? normalizedStableAccountKey
+			: `slot:${accountIndex}`;
+	return `${accountStateKey}:${quotaKey}`;
+}
+
 function normalizeDelayMs(value: number | null | undefined, fallback: number): number {
 	const candidate = typeof value === "number" && Number.isFinite(value) ? value : fallback;
 	return Math.max(0, Math.floor(candidate));
 }
 
+function addBackoffJitter(baseMs: number): number {
+	const jitter = baseMs * RATE_LIMIT_BACKOFF_JITTER_FACTOR * (Math.random() * 2 - 1);
+	return Math.max(0, Math.floor(baseMs + jitter));
+}
+
 function pruneStaleRateLimitState(): void {
 	const now = Date.now();
 	for (const [key, state] of rateLimitStateByAccountQuota) {
-		if (now - state.lastAt > RATE_LIMIT_STATE_RESET_MS) {
+		if (now - state.lastAt > rateLimitBackoffConfig.stateResetMs) {
 			rateLimitStateByAccountQuota.delete(key);
 		}
 	}
@@ -49,47 +144,62 @@ export function getRateLimitBackoff(
 	accountIndex: number,
 	quotaKey: string,
 	serverRetryAfterMs: number | null | undefined,
+	stableAccountKey?: StableAccountKey,
 ): RateLimitBackoffResult {
 	pruneStaleRateLimitState();
 	const now = Date.now();
-	const stateKey = `${accountIndex}:${quotaKey}`;
+	const stateKey = resolveRateLimitStateKey(
+		accountIndex,
+		quotaKey,
+		stableAccountKey,
+	);
 	const previous = rateLimitStateByAccountQuota.get(stateKey);
 
 	const baseDelay = normalizeDelayMs(serverRetryAfterMs, 1000);
 
-	if (previous && now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS) {
-		const backoffDelay = Math.min(
-			baseDelay * Math.pow(2, previous.consecutive429 - 1),
-			MAX_BACKOFF_MS,
-		);
+	if (previous && now - previous.lastAt < rateLimitBackoffConfig.dedupWindowMs) {
 		return {
 			attempt: previous.consecutive429,
-			delayMs: Math.max(baseDelay, backoffDelay),
+			delayMs: previous.lastDelayMs,
 			isDuplicate: true,
 		};
 	}
 
 	const attempt =
-		previous && now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS
+		previous && now - previous.lastAt < rateLimitBackoffConfig.stateResetMs
 			? previous.consecutive429 + 1
 			: 1;
 
+	const backoffDelay = Math.min(
+		baseDelay * Math.pow(2, attempt - 1),
+		rateLimitBackoffConfig.maxBackoffMs,
+	);
+	const jitteredDelay = Math.min(
+		addBackoffJitter(backoffDelay),
+		rateLimitBackoffConfig.maxBackoffMs,
+	);
+	const delayMs = Math.max(baseDelay, jitteredDelay);
 	rateLimitStateByAccountQuota.set(stateKey, {
 		consecutive429: attempt,
 		lastAt: now,
 		quotaKey,
+		lastDelayMs: delayMs,
 	});
-
-	const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), MAX_BACKOFF_MS);
 	return {
 		attempt,
-		delayMs: Math.max(baseDelay, backoffDelay),
+		delayMs,
 		isDuplicate: false,
 	};
 }
 
-export function resetRateLimitBackoff(accountIndex: number, quotaKey: string): void {
-	rateLimitStateByAccountQuota.delete(`${accountIndex}:${quotaKey}`);
+export function resetRateLimitBackoff(
+	accountIndex: number,
+	quotaKey: string,
+	stableAccountKey?: StableAccountKey,
+): void {
+	rateLimitStateByAccountQuota.delete(
+		resolveRateLimitStateKey(accountIndex, quotaKey, stableAccountKey),
+	);
 }
 
 export function clearRateLimitBackoffState(): void {
@@ -110,7 +220,10 @@ export function calculateBackoffMs(
 ): number {
 	const multiplier = BACKOFF_MULTIPLIERS[reason] ?? 1.0;
 	const exponentialDelay = baseDelayMs * Math.pow(2, attempt - 1);
-	return Math.min(Math.floor(exponentialDelay * multiplier), MAX_BACKOFF_MS);
+	return Math.min(
+		Math.floor(exponentialDelay * multiplier),
+		rateLimitBackoffConfig.maxBackoffMs,
+	);
 }
 
 export interface RateLimitBackoffWithReasonParams {
@@ -118,6 +231,7 @@ export interface RateLimitBackoffWithReasonParams {
 	quotaKey: string;
 	serverRetryAfterMs: number | null | undefined;
 	reason?: RateLimitReason;
+	stableAccountKey?: StableAccountKey;
 }
 
 export function getRateLimitBackoffWithReason(
@@ -128,12 +242,14 @@ export function getRateLimitBackoffWithReason(
 	quotaKey: string,
 	serverRetryAfterMs: number | null | undefined,
 	reason?: RateLimitReason,
+	stableAccountKey?: StableAccountKey,
 ): RateLimitBackoffResult;
 export function getRateLimitBackoffWithReason(
 	accountIndexOrParams: number | RateLimitBackoffWithReasonParams,
 	quotaKey?: string,
 	serverRetryAfterMs?: number | null | undefined,
 	reason: RateLimitReason = "unknown",
+	stableAccountKey?: StableAccountKey,
 ): RateLimitBackoffResult {
 	const useNamedParams = typeof accountIndexOrParams !== "number";
 	const resolvedAccountIndex = useNamedParams
@@ -148,6 +264,9 @@ export function getRateLimitBackoffWithReason(
 	const resolvedReason = useNamedParams
 		? (accountIndexOrParams.reason ?? "unknown")
 		: reason;
+	const resolvedStableAccountKey = useNamedParams
+		? accountIndexOrParams.stableAccountKey
+		: stableAccountKey;
 	if (!Number.isInteger(resolvedAccountIndex) || resolvedAccountIndex < 0) {
 		throw new TypeError(
 			"getRateLimitBackoffWithReason requires a non-negative integer accountIndex",
@@ -161,9 +280,13 @@ export function getRateLimitBackoffWithReason(
 		resolvedAccountIndex,
 		normalizedQuotaKey,
 		resolvedServerRetryAfterMs,
+		resolvedStableAccountKey,
 	);
+	const normalizedBaseDelay = normalizeDelayMs(resolvedServerRetryAfterMs, 1000);
 	const adjustedDelay = calculateBackoffMs(
-		result.delayMs,
+		result.attempt === 1 && !result.isDuplicate
+			? normalizedBaseDelay
+			: result.delayMs,
 		result.attempt,
 		resolvedReason,
 	);
diff --git a/lib/request/request-attempt-budget.ts b/lib/request/request-attempt-budget.ts
new file mode 100644
index 00000000..af6b895f
--- /dev/null
+++ b/lib/request/request-attempt-budget.ts
@@ -0,0 +1,84 @@
+const MAX_TOTAL_OUTBOUND_REQUEST_ATTEMPTS = 6;
+const MAX_STREAM_FAILOVERS = 1;
+const MAX_STREAM_FAILOVER_CANDIDATES = 2;
+
+/**
+ * Clamp configured stream failover retries to the conservative runtime cap.
+ */
+export function capStreamFailoverMax(value: number): number {
+	return Math.max(
+		0,
+		Math.min(MAX_STREAM_FAILOVERS, Math.floor(Number.isFinite(value) ? value : 0)),
+	);
+}
+
+/**
+ * Compute a finite per-request budget that bounds all outbound Responses API
+ * fetches across account rotation, same-account retries, empty-response
+ * retries, and stream failover.
+ */
+export function computeOutboundRequestAttemptBudget(params: {
+	accountCount: number;
+	maxSameAccountRetries: number;
+	emptyResponseMaxRetries: number;
+	streamFailoverMax: number;
+}): number {
+	const accountCount = Math.max(
+		1,
+		Math.floor(Number.isFinite(params.accountCount) ? params.accountCount : 1),
+	);
+	const maxSameAccountRetries = Math.max(
+		0,
+		Math.floor(
+			Number.isFinite(params.maxSameAccountRetries)
+				? params.maxSameAccountRetries
+				: 0,
+		),
+	);
+	const emptyResponseMaxRetries = Math.max(
+		0,
+		Math.floor(
+			Number.isFinite(params.emptyResponseMaxRetries)
+				? params.emptyResponseMaxRetries
+				: 0,
+		),
+	);
+	const streamFailoverMax = capStreamFailoverMax(params.streamFailoverMax);
+
+	return Math.max(
+		1,
+		Math.min(
+			accountCount +
+				maxSameAccountRetries +
+				emptyResponseMaxRetries +
+				streamFailoverMax,
+			MAX_TOTAL_OUTBOUND_REQUEST_ATTEMPTS,
+		),
+	);
+}
+
+/**
+ * Build the ordered stream-failover candidate list for a request.
+ *
+ * The caller is expected to pass a valid primary account index from the
+ * current account snapshot. This helper keeps the primary first and adds at
+ * most one alternate account to avoid broad replay fan-out.
+ */
+export function buildStreamFailoverCandidateOrder(
+	primaryIndex: number,
+	accountIndices: number[],
+): number[] {
+	const order: number[] = [primaryIndex];
+
+	for (const index of accountIndices) {
+		if (!Number.isFinite(index) || index === primaryIndex || order.includes(index)) {
+			continue;
+		}
+		order.push(index);
+		if (order.length >= MAX_STREAM_FAILOVER_CANDIDATES) {
+			break;
+		}
+	}
+
+	return order;
+}
diff --git a/lib/request/request-resilience.ts b/lib/request/request-resilience.ts
new file mode 100644
index 00000000..e88dddcd
--- /dev/null
+++ b/lib/request/request-resilience.ts
@@ -0,0 +1,109 @@
+import type { ManagedAccount } from "../accounts.js";
+
+const POOL_EXHAUSTION_COOLDOWN_MS = 15_000;
+const SERVER_BURST_COOLDOWN_MS = 10_000;
+const SERVER_BURST_THRESHOLD = 3;
+
+type ServerBurstState = {
+	windowStartedAt: number;
+	accountIndices: Set<number>;
+	cooldownUntil: number | null;
+};
+
+let poolExhaustionCooldownUntil: number | null = null;
+let serverBurstState: ServerBurstState = {
+	windowStartedAt: 0,
+	accountIndices: new Set<number>(),
+	cooldownUntil: null,
+};
+
+export function getPoolExhaustionCooldownRemaining(now = Date.now()): number {
+	if (!poolExhaustionCooldownUntil || poolExhaustionCooldownUntil <= now) {
+		return 0;
+	}
+	return poolExhaustionCooldownUntil - now;
+}
+
+export function armPoolExhaustionCooldown(waitMs: number, now = Date.now()): number {
+	const bounded = Math.max(POOL_EXHAUSTION_COOLDOWN_MS, Math.floor(waitMs));
+	const nextExpiry = now + bounded;
+	poolExhaustionCooldownUntil = Math.max(
+		poolExhaustionCooldownUntil ?? 0,
+		nextExpiry,
+	);
+	return poolExhaustionCooldownUntil;
+}
+
+export function clearPoolExhaustionCooldown(): void {
+	poolExhaustionCooldownUntil = null;
+}
+
+export function getServerBurstCooldownRemaining(now = Date.now()): number {
+	if (!serverBurstState.cooldownUntil || serverBurstState.cooldownUntil <= now) {
+		return 0;
+	}
+	return serverBurstState.cooldownUntil - now;
+}
+
+export function recordServerBurstFailure(
+	accountIndex: number,
+	now = Date.now(),
+): number {
+	if (serverBurstState.cooldownUntil && serverBurstState.cooldownUntil > now) {
+		return serverBurstState.cooldownUntil;
+	}
+	if (
+		(serverBurstState.cooldownUntil === null ||
+			serverBurstState.cooldownUntil <= now) &&
+		now - serverBurstState.windowStartedAt > SERVER_BURST_COOLDOWN_MS
+	) {
+		serverBurstState = {
+			windowStartedAt: now,
+			accountIndices: new Set<number>(),
+			cooldownUntil: null,
+		};
+	}
+	if (!serverBurstState.windowStartedAt) {
+		serverBurstState.windowStartedAt = now;
+	}
+	serverBurstState.accountIndices.add(accountIndex);
+	if (serverBurstState.accountIndices.size >= SERVER_BURST_THRESHOLD) {
+		serverBurstState.cooldownUntil = now + SERVER_BURST_COOLDOWN_MS;
+	}
+	return serverBurstState.cooldownUntil ?? 0;
+}
+
+export function clearServerBurstCooldown(): void {
+	serverBurstState = {
+		windowStartedAt: 0,
+		accountIndices: new Set<number>(),
+		cooldownUntil: null,
+	};
+}
+
+export function buildAdaptiveStreamFailoverCandidateOrder(
+	primaryIndex: number,
+	accounts: Array<Pick<ManagedAccount, "index" | "lastUsed" | "enabled" | "coolingDownUntil" | "rateLimitResetTimes">>,
+	now = Date.now(),
+): number[] {
+	const primary = accounts.find((account) => account.index === primaryIndex);
+	const alternates = accounts
+		.filter((account) => account.index !== primaryIndex && account.enabled !== false)
+		.filter((account) => {
+			const coolingDownUntil = account.coolingDownUntil ?? 0;
+			if (coolingDownUntil > now) return false;
+			const rateLimitValues = Object.values(account.rateLimitResetTimes ?? {});
+			return !rateLimitValues.some(
+				(value) => typeof value === "number" && value > now,
+			);
+		})
+		.sort((left, right) => (right.lastUsed ?? 0) - (left.lastUsed ?? 0))
+		.slice(0, 1)
+		.map((account) => account.index);
+	return [primary?.index ?? primaryIndex, ...alternates];
+}
+
+export function resetRequestResilienceStateForTests(): void {
+	clearPoolExhaustionCooldown();
+	clearServerBurstCooldown();
+}
diff --git a/lib/request/response-metadata.ts b/lib/request/response-metadata.ts
index d4fd7cc5..f6abac63 100644
--- a/lib/request/response-metadata.ts
+++ b/lib/request/response-metadata.ts
@@ -1,4 +1,4 @@
-const MAX_RETRY_HINT_MS = 5 * 60 * 1000;
+const MAX_RETRY_HINT_MS = 24 * 60 * 60 * 1000;
 
 function clampRetryHintMs(value: number): number | null {
 	if (!Number.isFinite(value)) return null;
diff --git a/lib/request/stream-failover.ts b/lib/request/stream-failover.ts
index 0dced4e1..2660eea6 100644
--- a/lib/request/stream-failover.ts
+++ b/lib/request/stream-failover.ts
@@ -105,7 +105,11 @@ async function readChunkWithSoftHardTimeout(
  *
  * The returned Response streams bytes from the initialResponse body and, when the stream stalls or errors, will attempt up to `maxFailovers` failovers by calling `getFallbackResponse(attempt, emittedBytes)`. On each successful failover a textual marker is injected into the stream identifying the failover attempt (and `requestInstanceId` when provided). The function performs best-effort cleanup of underlying readers and enforces soft/hard read timeouts as configured via `options`.
  *
- * Concurrency assumptions: the implementation expects a single consumer reading the returned Response body; callers must not concurrently read the same stream body from multiple consumers. Filesystem/platform note: behavior is platform-agnostic; no filesystem access is performed (Windows-specific filesystem semantics do not apply). Token redaction: any request identifiers embedded in the injected marker are limited to the normalized `requestInstanceId` (trimmed and truncated to 64 chars) to avoid leaking long tokens.
+	 * Concurrency assumptions: the implementation expects a single consumer reading the returned Response body; callers must not concurrently read the same stream body from multiple consumers. Filesystem/platform note: behavior is platform-agnostic; no filesystem access is performed (Windows-specific filesystem semantics do not apply). Token redaction: any request identifiers embedded in the injected marker are limited to the normalized `requestInstanceId` (trimmed and truncated to 64 chars) to avoid leaking long tokens.
+	 *
+	 * Failover safety: once the wrapper has already emitted bytes from the primary stream,
+	 * it will stop attempting fallback replays. Reissuing the upstream request after partial
+	 * output can duplicate streamed text and any side-effectful tool activity.
  *
  * @param initialResponse - The original Response whose body will be streamed and monitored for stalls/errors.
  * @param getFallbackResponse - Async function invoked for each failover attempt with the 1-based attempt number and total emitted bytes; should return a Response with a streaming body to switch to, or `null`/a Response without a body to indicate no fallback.
@@ -162,6 +166,9 @@ export function withStreamingFailover(
 				if (failoverAttempt >= maxFailovers) {
 					return false;
 				}
+				if (emittedBytes > 0) {
+					return false;
+				}
 				failoverAttempt += 1;
 				const fallback = await getFallbackResponse(failoverAttempt, emittedBytes);
 				if (!fallback?.body) {
diff --git a/lib/runtime/live-sync-entry.ts b/lib/runtime/live-sync-entry.ts
index fb368d09..40f1fdf3 100644
--- a/lib/runtime/live-sync-entry.ts
+++ b/lib/runtime/live-sync-entry.ts
@@ -12,10 +12,17 @@ export async function ensureLiveAccountSyncEntry<
 	authFallback?: OAuthAuthDetails;
 	currentSync: TSync | null;
 	currentPath: string | null;
+	currentConfigKey?: string | null;
 	getLiveAccountSync: (
 		config: ReturnType<typeof import("../config.js").loadPluginConfig>,
 	) => boolean;
 	getStoragePath: () => string;
+	getLiveAccountSyncDebounceMs: (
+		config: ReturnType<typeof import("../config.js").loadPluginConfig>,
+	) => number;
+	getLiveAccountSyncPollMs: (
+		config: ReturnType<typeof import("../config.js").loadPluginConfig>,
+	) => number;
 	createSync: (authFallback?: OAuthAuthDetails) => TSync;
 	registerCleanup: (cleanup: () => void) => void;
 	logWarn: (message: string) => void;
@@ -25,6 +32,8 @@ export async function ensureLiveAccountSyncEntry<
 		targetPath: string;
 		currentSync: TSync | null;
 		currentPath: string | null;
+		currentConfigKey?: string | null;
+		configKey?: string | null;
 		authFallback?: OAuthAuthDetails;
 		createSync: (authFallback?: OAuthAuthDetails) => TSync;
 		registerCleanup: (cleanup: () => void) => void;
@@ -33,16 +42,22 @@ export async function ensureLiveAccountSyncEntry<
 	}) => Promise<{
 		liveAccountSync: TSync | null;
 		liveAccountSyncPath: string | null;
+		liveAccountSyncConfigKey: string | null;
 	}>;
 }): Promise<{
 	liveAccountSync: TSync | null;
 	liveAccountSyncPath: string | null;
+	liveAccountSyncConfigKey: string | null;
 }> {
+	const debounceMs = params.getLiveAccountSyncDebounceMs(params.pluginConfig);
+	const pollIntervalMs = params.getLiveAccountSyncPollMs(params.pluginConfig);
 	return params.ensureLiveAccountSyncState({
 		enabled: params.getLiveAccountSync(params.pluginConfig),
 		targetPath: params.getStoragePath(),
 		currentSync: params.currentSync,
 		currentPath: params.currentPath,
+		currentConfigKey: params.currentConfigKey,
+		configKey: `${debounceMs}:${pollIntervalMs}`,
 		authFallback: params.authFallback,
 		createSync: params.createSync,
 		registerCleanup: params.registerCleanup,
diff --git a/lib/runtime/live-sync.ts b/lib/runtime/live-sync.ts
index c7859330..af9507fc 100644
--- a/lib/runtime/live-sync.ts
+++ b/lib/runtime/live-sync.ts
@@ -15,6 +15,7 @@ export async function ensureRuntimeLiveAccountSync<
 	getStoragePath: () => string;
 	currentSync: TSync | null;
 	currentPath: string | null;
+	currentConfigKey?: string | null;
 	currentCleanupRegistered: boolean;
 	getCurrentSync: () => TSync | null;
 	createSync: (
@@ -29,6 +30,7 @@ export async function ensureRuntimeLiveAccountSync<
 	commitState: (state: {
 		sync: TSync | null;
 		path: string | null;
+		configKey: string | null;
 		cleanupRegistered: boolean;
 	}) => void;
 	registerCleanup: (cleanup: () => void) => void;
@@ -37,18 +39,24 @@ export async function ensureRuntimeLiveAccountSync<
 }): Promise<{
 	sync: TSync | null;
 	path: string | null;
+	configKey: string | null;
 	cleanupRegistered: boolean;
 }> {
+	const debounceMs = deps.getLiveAccountSyncDebounceMs(deps.pluginConfig);
+	const pollIntervalMs = deps.getLiveAccountSyncPollMs(deps.pluginConfig);
+	const nextConfigKey = `${debounceMs}:${pollIntervalMs}`;
 	if (!deps.getLiveAccountSync(deps.pluginConfig)) {
 		deps.currentSync?.stop();
 		deps.commitState({
 			sync: null,
 			path: null,
+			configKey: null,
 			cleanupRegistered: deps.currentCleanupRegistered,
 		});
 		return {
 			sync: null,
 			path: null,
+			configKey: null,
 			cleanupRegistered: deps.currentCleanupRegistered,
 		};
 	}
@@ -57,10 +65,18 @@ export async function ensureRuntimeLiveAccountSync<
 	let sync = deps.currentSync;
 	let cleanupRegistered = deps.currentCleanupRegistered;
 	let nextPath = deps.currentPath;
+	let configKey = deps.currentConfigKey ?? null;
+	if (sync && configKey !== null && configKey !== nextConfigKey) {
+		sync.stop();
+		sync = null;
+		nextPath = null;
+		configKey = null;
+	}
 	const commitState = (): void => {
 		deps.commitState({
 			sync,
 			path: nextPath,
+			configKey,
 			cleanupRegistered,
 		});
 	};
@@ -70,10 +86,11 @@ export async function ensureRuntimeLiveAccountSync<
 				await deps.reloadAccountManagerFromDisk(deps.authFallback);
 			},
 			{
-				debounceMs: deps.getLiveAccountSyncDebounceMs(deps.pluginConfig),
-				pollIntervalMs: deps.getLiveAccountSyncPollMs(deps.pluginConfig),
+				debounceMs,
+				pollIntervalMs,
 			},
 		);
+		configKey = nextConfigKey;
 		commitState();
 		if (!cleanupRegistered) {
 			deps.registerCleanup(() => {
@@ -106,5 +123,5 @@ export async function ensureRuntimeLiveAccountSync<
 		}
 	}
 
-	return { sync, path: nextPath, cleanupRegistered };
+	return { sync, path: nextPath, configKey, cleanupRegistered };
 }
diff --git a/lib/runtime/metrics.ts b/lib/runtime/metrics.ts
deleted file mode 100644
index 49e95eb3..00000000
--- a/lib/runtime/metrics.ts
+++ /dev/null
@@ -1,127 +0,0 @@
-import type { FailoverMode } from "../request/failure-policy.js";
-
-export const MAX_RETRY_HINT_MS = 5 * 60 * 1000;
-
-export type RuntimeMetrics = {
-	startedAt: number;
-	totalRequests: number;
-	successfulRequests: number;
-	failedRequests: number;
-	rateLimitedResponses: number;
-	serverErrors: number;
-	networkErrors: number;
-	userAborts: number;
-	authRefreshFailures: number;
-	emptyResponseRetries: number;
-	accountRotations: number;
-	sameAccountRetries: number;
-	streamFailoverAttempts: number;
-	streamFailoverRecoveries: number;
-	streamFailoverCrossAccountRecoveries: number;
-	cumulativeLatencyMs: number;
-	lastRequestAt: number | null;
-	lastError: string | null;
-};
-
-export function createRuntimeMetrics(now = Date.now()): RuntimeMetrics {
-	return {
-		startedAt: now,
-		totalRequests: 0,
-		successfulRequests: 0,
-		failedRequests: 0,
-		rateLimitedResponses: 0,
-		serverErrors: 0,
-		networkErrors: 0,
-		userAborts: 0,
-		authRefreshFailures: 0,
-		emptyResponseRetries: 0,
-		accountRotations: 0,
-		sameAccountRetries: 0,
-		streamFailoverAttempts: 0,
-		streamFailoverRecoveries: 0,
-		streamFailoverCrossAccountRecoveries: 0,
-		cumulativeLatencyMs: 0,
-		lastRequestAt: null,
-		lastError: null,
-	};
-}
-
-export function parseFailoverMode(value: string | undefined): FailoverMode {
-	const normalized = (value ?? "").trim().toLowerCase();
-	if (normalized === "aggressive") return "aggressive";
-	if (normalized === "conservative") return "conservative";
-	return "balanced";
-}
-
-export function parseEnvInt(value: string | undefined): number | undefined {
-	if (value === undefined) return undefined;
-	const parsed = Number.parseInt(value, 10);
-	return Number.isFinite(parsed) ? parsed : undefined;
-}
-
-export function clampRetryHintMs(value: number): number | null {
-	if (!Number.isFinite(value)) return null;
-	const normalized = Math.floor(value);
-	if (normalized <= 0) return null;
-	return Math.min(normalized, MAX_RETRY_HINT_MS);
-}
-
-export function parseRetryAfterHintMs(
-	headers: Headers,
-	now = Date.now(),
-): number | null {
-	const retryAfterMsHeader = headers.get("retry-after-ms")?.trim();
-	if (retryAfterMsHeader && /^\d+$/.test(retryAfterMsHeader)) {
-		return clampRetryHintMs(Number.parseInt(retryAfterMsHeader, 10));
-	}
-
-	const retryAfterHeader = headers.get("retry-after")?.trim();
-	if (retryAfterHeader && /^\d+$/.test(retryAfterHeader)) {
-		return clampRetryHintMs(Number.parseInt(retryAfterHeader, 10) * 1000);
-	}
-	if (retryAfterHeader) {
-		const retryAtMs = Date.parse(retryAfterHeader);
-		if (Number.isFinite(retryAtMs)) {
-			return clampRetryHintMs(retryAtMs - now);
-		}
-	}
-
-	const resetAtHeader = headers.get("x-ratelimit-reset")?.trim();
-	if (resetAtHeader && /^\d+$/.test(resetAtHeader)) {
-		const resetRaw = Number.parseInt(resetAtHeader, 10);
-		const resetAtMs = resetRaw < 10_000_000_000 ? resetRaw * 1000 : resetRaw;
-		return clampRetryHintMs(resetAtMs - now);
-	}
-
-	return null;
-}
-
-export function sanitizeResponseHeadersForLog(
-	headers: Headers,
-): Record<string, string> {
-	const allowed = new Set([
-		"content-type",
-		"x-request-id",
-		"x-openai-request-id",
-		"x-codex-plan-type",
-		"x-codex-active-limit",
-		"x-codex-primary-used-percent",
-		"x-codex-primary-window-minutes",
-		"x-codex-primary-reset-at",
-		"x-codex-primary-reset-after-seconds",
-		"x-codex-secondary-used-percent",
-		"x-codex-secondary-window-minutes",
-		"x-codex-secondary-reset-at",
-		"x-codex-secondary-reset-after-seconds",
-		"retry-after",
-		"x-ratelimit-reset",
-		"x-ratelimit-reset-requests",
-	]);
-	const sanitized: Record<string, string> = {};
-	for (const [rawName, rawValue] of headers.entries()) {
-		const name = rawName.toLowerCase();
-		if (!allowed.has(name)) continue;
-		sanitized[name] = rawValue;
-	}
-	return sanitized;
-}
diff --git a/lib/runtime/runtime-observability.ts b/lib/runtime/runtime-observability.ts
new file mode 100644
index 00000000..09599f25
--- /dev/null
+++ b/lib/runtime/runtime-observability.ts
@@ -0,0 +1,222 @@
+import { existsSync, readFileSync, promises as fs } from "node:fs";
+import { join } from "node:path";
+import { getCodexMultiAuthDir } from "../runtime-paths.js";
+
+export interface RuntimeMetricsSnapshot {
+	startedAt: number;
+	totalRequests: number;
+	successfulRequests: number;
+	failedRequests: number;
+	responsesRequests: number;
+	authRefreshRequests: number;
+	diagnosticProbeRequests: number;
+	outboundRequestAttemptBudget: number | null;
+	outboundRequestAttemptsConsumed: number;
+	requestAttemptBudgetExhaustions: number;
+	poolExhaustionFastFails: number;
+	serverBurstFastFails: number;
+	rateLimitedResponses: number;
+	serverErrors: number;
+	networkErrors: number;
+	userAborts: number;
+	authRefreshFailures: number;
+	emptyResponseRetries: number;
+	accountRotations: number;
+	sameAccountRetries: number;
+	streamFailoverAttempts: number;
+	streamFailoverCandidatesConsidered: number;
+	lastStreamFailoverCandidateCount: number;
+	streamFailoverRecoveries: number;
+	streamFailoverCrossAccountRecoveries: number;
+	cumulativeLatencyMs: number;
+	lastRequestAt: number | null;
+	lastError: string | null;
+}
+
+export interface RuntimeObservabilitySnapshot {
+	version: number;
+	updatedAt: number;
+	currentRequestId: string | null;
+	responsesRequests: number;
+	authRefreshRequests: number;
+	diagnosticProbeRequests: number;
+	poolExhaustionCooldownUntil: number | null;
+	serverBurstCooldownUntil: number | null;
+	runtimeMetrics: RuntimeMetricsSnapshot;
+}
+
+const SNAPSHOT_FILE_NAME = "runtime-observability.json";
+const PERSIST_RUNTIME_SNAPSHOT = process.env.VITEST !== "true";
+const RUNTIME_OBSERVABILITY_SNAPSHOT_VERSION = 1;
+const RETRYABLE_SNAPSHOT_ERRORS = new Set(["EBUSY", "EPERM"]);
+
+let snapshotState: RuntimeObservabilitySnapshot | null = null;
+let pendingWrite: Promise<void> | null = null;
+
+function getSnapshotPath(): string {
+	return join(getCodexMultiAuthDir(), SNAPSHOT_FILE_NAME);
+}
+
+function createDefaultSnapshot(): RuntimeObservabilitySnapshot {
+	return {
+		version: RUNTIME_OBSERVABILITY_SNAPSHOT_VERSION,
+		updatedAt: 0,
+		currentRequestId: null,
+		responsesRequests: 0,
+		authRefreshRequests: 0,
+		diagnosticProbeRequests: 0,
+		poolExhaustionCooldownUntil: null,
+		serverBurstCooldownUntil: null,
+		runtimeMetrics: {
+			startedAt: 0,
+			totalRequests: 0,
+			successfulRequests: 0,
+			failedRequests: 0,
+			responsesRequests: 0,
+			authRefreshRequests: 0,
+			diagnosticProbeRequests: 0,
+			outboundRequestAttemptBudget: null,
+			outboundRequestAttemptsConsumed: 0,
+			requestAttemptBudgetExhaustions: 0,
+			poolExhaustionFastFails: 0,
+			serverBurstFastFails: 0,
+			rateLimitedResponses: 0,
+			serverErrors: 0,
+			networkErrors: 0,
+			userAborts: 0,
+			authRefreshFailures: 0,
+			emptyResponseRetries: 0,
+			accountRotations: 0,
+			sameAccountRetries: 0,
+			streamFailoverAttempts: 0,
+			streamFailoverCandidatesConsidered: 0,
+			lastStreamFailoverCandidateCount: 0,
+			streamFailoverRecoveries: 0,
+			streamFailoverCrossAccountRecoveries: 0,
+			cumulativeLatencyMs: 0,
+			lastRequestAt: null,
+			lastError: null,
+		},
+	};
+}
+
+function normalizePersistedSnapshot(
+	parsed: Partial<RuntimeObservabilitySnapshot> | null,
+): RuntimeObservabilitySnapshot | null {
+	if (!parsed || typeof parsed !== "object") {
+		return null;
+	}
+	if (
+		typeof parsed.version === "number" &&
+		parsed.version !== RUNTIME_OBSERVABILITY_SNAPSHOT_VERSION
+	) {
+		return null;
+	}
+	const base = createDefaultSnapshot();
+	return {
+		...base,
+		...parsed,
+		version: RUNTIME_OBSERVABILITY_SNAPSHOT_VERSION,
+		runtimeMetrics: {
+			...base.runtimeMetrics,
+			...(parsed.runtimeMetrics ?? {}),
+		},
+	};
+}
+
+function loadPersistedRuntimeObservabilitySnapshotSync(): RuntimeObservabilitySnapshot | null {
+	const path = getSnapshotPath();
+	if (!existsSync(path)) {
+		return null;
+	}
+	try {
+		const raw = readFileSync(path, "utf-8");
+		const parsed = JSON.parse(raw) as Partial<RuntimeObservabilitySnapshot> | null;
+		return normalizePersistedSnapshot(parsed);
+	} catch {
+		return null;
+	}
+}
+
+function ensureSnapshotState(): RuntimeObservabilitySnapshot {
+	if (!snapshotState) {
+		snapshotState =
+			(PERSIST_RUNTIME_SNAPSHOT
+				? loadPersistedRuntimeObservabilitySnapshotSync()
+				: null) ?? createDefaultSnapshot();
+	}
+	return snapshotState;
+}
+
+async function writeSnapshot(snapshot: RuntimeObservabilitySnapshot): Promise<void> {
+	const dir = getCodexMultiAuthDir();
+	const path = getSnapshotPath();
+	await fs.mkdir(dir, { recursive: true });
+	let lastError: unknown = null;
+	for (let attempt = 0; attempt < 3; attempt += 1) {
+		const tempPath = `${path}.${process.pid}.${Date.now()}.${attempt}.tmp`;
+		let moved = false;
+		try {
+			await fs.writeFile(tempPath, JSON.stringify(snapshot, null, 2), "utf-8");
+			await fs.rename(tempPath, path);
+			moved = true;
+			return;
+		} catch (error) {
+			lastError = error;
+			const code = (error as NodeJS.ErrnoException | undefined)?.code ?? "";
+			if (!RETRYABLE_SNAPSHOT_ERRORS.has(code) || attempt >= 2) {
+				throw error;
+			}
+		} finally {
+			if (!moved) {
+				try {
+					await fs.unlink(tempPath);
+				} catch {
+					// Best-effort cleanup for interrupted writes.
+				}
+			}
+		}
+	}
+	if (lastError) {
+		throw lastError;
+	}
+}
+
+export function getRuntimeObservabilitySnapshot(): RuntimeObservabilitySnapshot {
+	return structuredClone(ensureSnapshotState());
+}
+
+export function mutateRuntimeObservabilitySnapshot(
+	mutator: (snapshot: RuntimeObservabilitySnapshot) => void,
+): void {
+	const snapshot = ensureSnapshotState();
+	mutator(snapshot);
+	snapshot.updatedAt = Date.now();
+	if (!PERSIST_RUNTIME_SNAPSHOT) {
+		return;
+	}
+	const nextSnapshot = structuredClone(snapshot);
+	pendingWrite = (pendingWrite ?? Promise.resolve())
+		.catch(() => undefined)
+		.then(() => writeSnapshot(nextSnapshot))
+		.catch(() => undefined);
+}
+
+export async function loadPersistedRuntimeObservabilitySnapshot(): Promise<RuntimeObservabilitySnapshot | null> {
+	const path = getSnapshotPath();
+	if (!existsSync(path)) {
+		return null;
+	}
+	try {
+		const raw = await fs.readFile(path, "utf-8");
+		const parsed = JSON.parse(raw) as Partial<RuntimeObservabilitySnapshot> | null;
+		return normalizePersistedSnapshot(parsed);
+	} catch {
+		return null;
+	}
+}
+
+export function resetRuntimeObservabilitySnapshotForTests(): void {
+	snapshotState = createDefaultSnapshot();
+	pendingWrite = null;
+}
diff --git a/lib/runtime/runtime-services.ts b/lib/runtime/runtime-services.ts
index 7bd52b1a..d64de399 100644
--- a/lib/runtime/runtime-services.ts
+++ b/lib/runtime/runtime-services.ts
@@ -20,6 +20,8 @@ export async function ensureLiveAccountSyncState<
 	targetPath: string;
 	currentSync: TSync | null;
 	currentPath: string | null;
+	currentConfigKey?: string | null;
+	configKey?: string | null;
 	authFallback?: OAuthAuthDetails;
 	createSync: (authFallback?: OAuthAuthDetails) => TSync;
 	registerCleanup: (cleanup: () => void) => void;
@@ -28,21 +30,37 @@ export async function ensureLiveAccountSyncState<
 }): Promise<{
 	liveAccountSync: TSync | null;
 	liveAccountSyncPath: string | null;
+	liveAccountSyncConfigKey: string | null;
 }> {
 	let liveAccountSync = params.currentSync;
 	let liveAccountSyncPath = params.currentPath;
+	let liveAccountSyncConfigKey = params.currentConfigKey ?? null;
 
 	if (!params.enabled) {
-		if (liveAccountSync) {
-			liveAccountSync.stop();
-			liveAccountSync = null;
-			liveAccountSyncPath = null;
-		}
-		return { liveAccountSync, liveAccountSyncPath };
+		liveAccountSync?.stop();
+		return {
+			liveAccountSync: null,
+			liveAccountSyncPath: null,
+			liveAccountSyncConfigKey: null,
+		};
+	}
+
+	const nextConfigKey = params.configKey ?? null;
+	if (
+		liveAccountSync &&
+		nextConfigKey !== null &&
+		(liveAccountSyncConfigKey === null ||
+			liveAccountSyncConfigKey !== nextConfigKey)
+	) {
+		liveAccountSync.stop();
+		liveAccountSync = null;
+		liveAccountSyncPath = null;
+		liveAccountSyncConfigKey = null;
 	}
 
 	if (!liveAccountSync) {
 		liveAccountSync = params.createSync(params.authFallback);
+		liveAccountSyncConfigKey = nextConfigKey;
 		params.registerCleanup(() => {
 			liveAccountSync?.stop();
 		});
@@ -71,7 +89,7 @@ export async function ensureLiveAccountSyncState<
 		}
 	}
 
-	return { liveAccountSync, liveAccountSyncPath };
+	return { liveAccountSync, liveAccountSyncPath, liveAccountSyncConfigKey };
 }
 
 export function ensureRefreshGuardianState<
diff --git a/lib/schemas.ts b/lib/schemas.ts
index 89c13669..ae888959 100644
--- a/lib/schemas.ts
+++ b/lib/schemas.ts
@@ -38,6 +38,10 @@ export const PluginConfigSchema = z.object({
 	parallelProbingMaxConcurrency: z.number().min(1).max(5).optional(),
 	emptyResponseMaxRetries: z.number().min(0).optional(),
 	emptyResponseRetryDelayMs: z.number().min(0).optional(),
+	rateLimitDedupWindowMs: z.number().min(0).optional(),
+	rateLimitStateResetMs: z.number().min(1_000).optional(),
+	rateLimitMaxBackoffMs: z.number().min(1_000).optional(),
+	rateLimitShortRetryThresholdMs: z.number().min(0).optional(),
 	pidOffsetEnabled: z.boolean().optional(),
 	fetchTimeoutMs: z.number().min(1_000).optional(),
 	streamStallTimeoutMs: z.number().min(1_000).optional(),
@@ -77,7 +81,7 @@ export type AccountIdSourceFromSchema = z.infer<typeof AccountIdSourceSchema>;
 /**
  * Cooldown reason for temporary account suspension.
  */
-export const CooldownReasonSchema = z.enum(["auth-failure", "network-error", "rate-limit"]);
+export const CooldownReasonSchema = z.enum(["auth-failure", "network-error", "server-error", "rate-limit"]);
 
 export type CooldownReasonFromSchema = z.infer<typeof CooldownReasonSchema>;
 
diff --git a/lib/session-affinity.ts b/lib/session-affinity.ts
index 1ce27e30..a8909208 100644
--- a/lib/session-affinity.ts
+++ b/lib/session-affinity.ts
@@ -12,6 +12,7 @@ interface SessionAffinityEntry {
 	expiresAt: number;
 	lastResponseId?: string;
 	updatedAt: number;
+	writeVersion: number;
 }
 
 const DEFAULT_TTL_MS = 20 * 60 * 1000;
@@ -42,6 +43,7 @@ export class SessionAffinityStore {
 	private readonly ttlMs: number;
 	private readonly maxEntries: number;
 	private readonly entries = new Map<string, SessionAffinityEntry>();
+	private writeVersionCounter = 0;
 
 	constructor(options: SessionAffinityOptions = {}) {
 		this.ttlMs = Math.max(1_000, Math.floor(options.ttlMs ?? DEFAULT_TTL_MS));
@@ -62,17 +64,35 @@ export class SessionAffinityStore {
 	}
 
 	remember(sessionKey: string | null | undefined, accountIndex: number, now = Date.now()): void {
+		this.rememberWithVersion(sessionKey, accountIndex, now);
+	}
+
+	rememberWithVersion(
+		sessionKey: string | null | undefined,
+		accountIndex: number,
+		now = Date.now(),
+		writeVersion?: number,
+	): void {
 		const key = normalizeSessionKey(sessionKey);
 		if (!key) return;
 		if (!Number.isFinite(accountIndex) || accountIndex < 0) return;
+		const normalizedWriteVersion = this.normalizeWriteVersion(writeVersion);
 
 		const existingEntry = this.entries.get(key);
+		if (
+			existingEntry &&
+			existingEntry.expiresAt > now &&
+			existingEntry.writeVersion > normalizedWriteVersion
+		) {
+			return;
+		}
 
 		this.setEntry(key, {
 			accountIndex,
 			expiresAt: now + this.ttlMs,
 			lastResponseId: existingEntry?.lastResponseId,
 			updatedAt: now,
+			writeVersion: normalizedWriteVersion,
 		});
 	}
 
@@ -110,10 +130,12 @@ export class SessionAffinityStore {
 		sessionKey: string | null | undefined,
 		responseId: string | null | undefined,
 		now = Date.now(),
+		writeVersion?: number,
 	): void {
 		const key = normalizeSessionKey(sessionKey);
 		const normalizedResponseId = typeof responseId === "string" ? responseId.trim() : "";
 		if (!key || !normalizedResponseId) return;
+		const normalizedWriteVersion = this.normalizeWriteVersion(writeVersion);
 
 		const entry = this.entries.get(key);
 		if (!entry) return;
@@ -121,12 +143,16 @@ export class SessionAffinityStore {
 			this.entries.delete(key);
 			return;
 		}
+		if (entry.writeVersion > normalizedWriteVersion) {
+			return;
+		}
 
 		this.setEntry(key, {
 			...entry,
 			expiresAt: now + this.ttlMs,
 			lastResponseId: normalizedResponseId,
 			updatedAt: now,
+			writeVersion: normalizedWriteVersion,
 		});
 	}
 
@@ -203,4 +229,14 @@ export class SessionAffinityStore {
 
 		return oldestKey;
 	}
+
+	private normalizeWriteVersion(writeVersion?: number): number {
+		if (typeof writeVersion === "number" && Number.isFinite(writeVersion)) {
+			const normalized = Math.max(0, Math.floor(writeVersion));
+			this.writeVersionCounter = Math.max(this.writeVersionCounter, normalized);
+			return normalized;
+		}
+		this.writeVersionCounter += 1;
+		return this.writeVersionCounter;
+	}
 }
diff --git a/lib/storage.ts b/lib/storage.ts
index e744bfbd..2eb659dd 100644
--- a/lib/storage.ts
+++ b/lib/storage.ts
@@ -51,6 +51,10 @@ import { clearFlaggedAccountsEntry } from "./storage/flagged-entry.js";
 import { loadFlaggedAccountsEntry } from "./storage/flagged-load-entry.js";
 import { saveFlaggedAccountsEntry } from "./storage/flagged-save-entry.js";
 import { normalizeFlaggedStorage } from "./storage/flagged-storage.js";
+import {
+	createStorageHealthSummary,
+	type StorageHealthSummary,
+} from "./storage/health.js";
 import {
 	clearFlaggedAccountsOnDisk,
 	loadFlaggedAccountsState,
@@ -128,6 +132,7 @@ import {
 } from "./storage/transactions.js";
 
 export type {
+	StorageHealthSummary,
 	CooldownReason,
 	RateLimitStateV3,
 	AccountMetadataV1,
@@ -1264,8 +1269,108 @@ export async function getRestoreAssessment(): Promise<RestoreAssessment> {
 	});
 }
 
+export async function inspectStorageHealth(): Promise<StorageHealthSummary> {
+	const path = getStoragePath();
+	const walPath = getAccountsWalPath(path);
+	const resetMarkerPath = getIntentionalResetMarkerPath(path);
+	if (existsSync(resetMarkerPath)) {
+		return createStorageHealthSummary({
+			state: "intentional-reset",
+			path,
+			walPath,
+			resetMarkerPath,
+			details: "intentional reset marker present",
+		});
+	}
+	if (!existsSync(path)) {
+		const walRecovered = await loadAccountsFromJournal(path, { silent: true });
+		if (walRecovered && walRecovered.accounts.length > 0) {
+			return createStorageHealthSummary({
+				state: "recoverable",
+				path,
+				walPath,
+				resetMarkerPath,
+				details: "primary storage missing but WAL recovery is available",
+				recoverySource: "wal",
+			});
+		}
+		return createStorageHealthSummary({
+			state: "empty",
+			path,
+			walPath,
+			resetMarkerPath,
+			details: "storage file is missing",
+		});
+	}
+	try {
+		const { normalized, schemaErrors } = await loadAccountsFromPath(path, {
+			normalizeAccountStorage,
+			isRecord,
+		});
+		if (normalized && normalized.accounts.length > 0) {
+			return createStorageHealthSummary({
+				state: "healthy",
+				path,
+				walPath,
+				resetMarkerPath,
+				schemaErrors,
+			});
+		}
+		if (normalized) {
+			return createStorageHealthSummary({
+				state: "empty",
+				path,
+				walPath,
+				resetMarkerPath,
+				schemaErrors,
+				details: "storage parsed but contains no accounts",
+			});
+		}
+		const walRecovered = await loadAccountsFromJournal(path, { silent: true });
+		if (walRecovered && walRecovered.accounts.length > 0) {
+			return createStorageHealthSummary({
+				state: "recoverable",
+				path,
+				walPath,
+				resetMarkerPath,
+				schemaErrors,
+				details: "primary storage is invalid but WAL recovery is available",
+				recoverySource: "wal",
+			});
+		}
+		return createStorageHealthSummary({
+			state: "corrupt",
+			path,
+			walPath,
+			resetMarkerPath,
+			schemaErrors,
+			details: "storage could not be normalized",
+		});
+	} catch (error) {
+		const walRecovered = await loadAccountsFromJournal(path, { silent: true });
+		if (walRecovered && walRecovered.accounts.length > 0) {
+			return createStorageHealthSummary({
+				state: "recoverable",
+				path,
+				walPath,
+				resetMarkerPath,
+				details: error instanceof Error ? error.message : String(error),
+				recoverySource: "wal",
+			});
+		}
+		return createStorageHealthSummary({
+			state: "corrupt",
+			path,
+			walPath,
+			resetMarkerPath,
+			details: error instanceof Error ? error.message : String(error),
+		});
+	}
+}
+
 async function loadAccountsFromJournal(
 	path: string,
+	options: { silent?: boolean } = {},
 ): Promise<AccountStorageV3 | null> {
 	const walPath = getAccountsWalPath(path);
 	const resetMarkerPath = getIntentionalResetMarkerPath(path);
@@ -1285,7 +1390,9 @@ async function loadAccountsFromJournal(
 			return null;
 		const computed = computeSha256(entry.content);
 		if (computed !== entry.checksum) {
-			log.warn("Account journal checksum mismatch", { path: walPath });
+			if (!options.silent) {
+				log.warn("Account journal checksum mismatch", { path: walPath });
+			}
 			return null;
 		}
 		const data = JSON.parse(entry.content) as unknown;
@@ -1295,11 +1402,13 @@ async function loadAccountsFromJournal(
 			isRecord,
 		);
 		if (!normalized) return null;
-		log.warn("Recovered account storage from WAL journal", { path, walPath });
+		if (!options.silent) {
+			log.warn("Recovered account storage from WAL journal", { path, walPath });
+		}
 		return normalized;
 	} catch (error) {
 		const code = (error as NodeJS.ErrnoException).code;
-		if (code !== "ENOENT") {
+		if (code !== "ENOENT" && !options.silent) {
 			log.warn("Failed to load account WAL journal", {
 				path: walPath,
 				error: String(error),
diff --git a/lib/storage/account-clear.ts b/lib/storage/account-clear.ts
index 5f428cd3..25432ec9 100644
--- a/lib/storage/account-clear.ts
+++ b/lib/storage/account-clear.ts
@@ -1,5 +1,14 @@
 import { promises as fs } from "node:fs";
 
+function isRetryableFsError(error: unknown): boolean {
+	const code = (error as NodeJS.ErrnoException | undefined)?.code;
+	return code === "EBUSY" || code === "EPERM";
+}
+
+async function sleep(ms: number): Promise<void> {
+	await new Promise((resolve) => setTimeout(resolve, ms));
+}
+
 export async function clearAccountStorageArtifacts(params: {
 	path: string;
 	resetMarkerPath: string;
@@ -7,32 +16,57 @@ export async function clearAccountStorageArtifacts(params: {
 	backupPaths: string[];
 	logError: (message: string, details: Record<string, unknown>) => void;
 }): Promise<void> {
-	await fs.writeFile(
-		params.resetMarkerPath,
-		JSON.stringify({ version: 1, createdAt: Date.now() }),
-		{ encoding: "utf-8", mode: 0o600 },
-	);
-	const clearPath = async (targetPath: string): Promise<void> => {
+	const clearPath = async (
+		targetPath: string,
+		required: boolean,
+	): Promise<void> => {
 		try {
-			await fs.unlink(targetPath);
+			for (let attempt = 0; attempt < 5; attempt += 1) {
+				try {
+					await fs.unlink(targetPath);
+					return;
+				} catch (error) {
+					const code = (error as NodeJS.ErrnoException).code;
+					if (code === "ENOENT") {
+						return;
+					}
+					if (!isRetryableFsError(error) || attempt >= 4) {
+						throw error;
+					}
+					await sleep(10 * 2 ** attempt);
+				}
+			}
 		} catch (error) {
 			const code = (error as NodeJS.ErrnoException).code;
-			if (code !== "ENOENT") {
+			if (code === "ENOENT") {
+				return;
+			}
+			if (required) {
 				params.logError("Failed to clear account storage artifact", {
 					path: targetPath,
 					error: String(error),
 				});
+				throw error;
 			}
+			params.logError("Failed to clear account storage artifact", {
+				path: targetPath,
+				error: String(error),
+			});
 		}
 	};
 
-	try {
-		await Promise.all([
-			clearPath(params.path),
-			clearPath(params.walPath),
-			...params.backupPaths.map(clearPath),
-		]);
-	} catch {
-		// Individual path cleanup is already best-effort with per-artifact logging.
+	await clearPath(params.path, true);
+	await clearPath(params.walPath, true);
+	await fs.writeFile(
+		params.resetMarkerPath,
+		JSON.stringify({ version: 1, createdAt: Date.now() }),
+		{ encoding: "utf-8", mode: 0o600 },
+	);
+	for (const backupPath of params.backupPaths) {
+		try {
+			await clearPath(backupPath, false);
+		} catch {
+			// Non-critical artifacts are already logged best-effort.
+		}
 	}
 }
diff --git a/lib/storage/health.ts b/lib/storage/health.ts
new file mode 100644
index 00000000..5ca2a4a6
--- /dev/null
+++ b/lib/storage/health.ts
@@ -0,0 +1,42 @@
+import { existsSync } from "node:fs";
+
+export type StorageHealthState =
+	| "healthy"
+	| "empty"
+	| "intentional-reset"
+	| "corrupt"
+	| "recoverable";
+
+export interface StorageHealthSummary {
+	state: StorageHealthState;
+	path: string;
+	resetMarkerPath: string;
+	walPath: string;
+	hasResetMarker: boolean;
+	hasWal: boolean;
+	details?: string;
+	schemaErrors?: string[];
+	recoverySource?: "wal";
+}
+
+export function createStorageHealthSummary(params: {
+	state: StorageHealthState;
+	path: string;
+	resetMarkerPath: string;
+	walPath: string;
+	details?: string;
+	schemaErrors?: string[];
+	recoverySource?: "wal";
+}): StorageHealthSummary {
+	return {
+		state: params.state,
+		path: params.path,
+		resetMarkerPath: params.resetMarkerPath,
+		walPath: params.walPath,
+		hasResetMarker: existsSync(params.resetMarkerPath),
+		hasWal: existsSync(params.walPath),
+		details: params.details,
+		schemaErrors: params.schemaErrors,
+		recoverySource: params.recoverySource,
+	};
+}
diff --git a/lib/storage/migrations.ts b/lib/storage/migrations.ts
index d086ca83..70a92d2a 100644
--- a/lib/storage/migrations.ts
+++ b/lib/storage/migrations.ts
@@ -7,7 +7,7 @@ import { MODEL_FAMILIES, type ModelFamily } from "../prompts/codex.js";
 import type { AccountIdSource } from "../types.js";
 import type { Workspace } from "../accounts.js";
 
-export type CooldownReason = "auth-failure" | "network-error" | "rate-limit";
+export type CooldownReason = "auth-failure" | "network-error" | "server-error" | "rate-limit";
 
 export interface RateLimitStateV3 {
 	[key: string]: number | undefined;
diff --git a/test/account-clear.test.ts b/test/account-clear.test.ts
index d2edfc8a..428b94b2 100644
--- a/test/account-clear.test.ts
+++ b/test/account-clear.test.ts
@@ -1,7 +1,13 @@
-import { describe, expect, it, vi } from "vitest";
+import { promises as fs } from "node:fs";
+import { afterEach, describe, expect, it, vi } from "vitest";
 import { clearAccountStorageArtifacts } from "../lib/storage/account-clear.js";
 
 describe("account clear helper", () => {
+	afterEach(() => {
+		vi.restoreAllMocks();
+		vi.useRealTimers();
+	});
+
 	it("clears primary, wal, and backups after writing marker", async () => {
 		await expect(
 			clearAccountStorageArtifacts({
@@ -13,4 +19,34 @@ describe("account clear helper", () => {
 			}),
 		).resolves.toBeUndefined();
 	});
+
+	it.each(["EBUSY", "EPERM"] as const)(
+		"retries transient %s errors when clearing required artifacts",
+		async (code) => {
+			vi.useFakeTimers();
+			const unlinkSpy = vi.spyOn(fs, "unlink");
+			let attempts = 0;
+			unlinkSpy.mockImplementation(async (targetPath) => {
+				if (String(targetPath).endsWith("tmp-accounts.json") && attempts < 2) {
+					attempts += 1;
+					const error = new Error(code) as NodeJS.ErrnoException;
+					error.code = code;
+					throw error;
+				}
+				return undefined as never;
+			});
+
+			const clearPromise = clearAccountStorageArtifacts({
+				path: `${process.cwd()}/tmp-accounts.json`,
+				resetMarkerPath: `${process.cwd()}/tmp-accounts.marker`,
+				walPath: `${process.cwd()}/tmp-accounts.wal`,
+				backupPaths: [],
+				logError: vi.fn(),
+			});
+
+			await vi.runAllTimersAsync();
+			await expect(clearPromise).resolves.toBeUndefined();
+			expect(unlinkSpy).toHaveBeenCalled();
+		},
+	);
 });
diff --git a/test/accounts.test.ts b/test/accounts.test.ts
index 9b2367d1..4124bc01 100644
--- a/test/accounts.test.ts
+++ b/test/accounts.test.ts
@@ -427,6 +427,55 @@ describe("AccountManager", () => {
     expect(manager.isAccountAvailableForFamily(3, "codex")).toBe(true);
   });
 
+  it("treats accounts with all tracked workspaces disabled as unavailable for selection", () => {
+    const now = Date.now();
+    const stored = {
+      version: 3 as const,
+      activeIndex: 0,
+      activeIndexByFamily: { codex: 0 },
+      accounts: [
+        {
+          refreshToken: "token-workspace-disabled",
+          addedAt: now,
+          lastUsed: now,
+          workspaces: [
+            { id: "workspace-1", name: "Workspace 1", enabled: false },
+            { id: "workspace-2", name: "Workspace 2", enabled: false },
+          ],
+          currentWorkspaceIndex: 0,
+        },
+        {
+          refreshToken: "token-ready",
+          addedAt: now,
+          lastUsed: now - 10_000,
+        },
+      ],
+    };
+
+    const manager = new AccountManager(undefined, stored as never);
+
+    expect(manager.isAccountAvailableForFamily(0, "codex")).toBe(false);
+    expect(manager.getCurrentOrNextForFamily("codex")?.refreshToken).toBe("token-ready");
+    expect(manager.getNextForFamily("codex")?.refreshToken).toBe("token-ready");
+    expect(manager.getCurrentOrNextForFamilyHybrid("codex")?.refreshToken).toBe("token-ready");
+  });
+
+  it("keeps workspace-less legacy accounts eligible for selection", () => {
+    const now = Date.now();
+    const stored = {
+      version: 3 as const,
+      activeIndex: 0,
+      activeIndexByFamily: { codex: 0 },
+      accounts: [{ refreshToken: "token-legacy", addedAt: now, lastUsed: now }],
+    };
+
+    const manager = new AccountManager(undefined, stored as never);
+
+    expect(manager.hasEnabledWorkspaces(manager.getAccountByIndex(0)!)).toBe(true);
+    expect(manager.isAccountAvailableForFamily(0, "codex")).toBe(true);
+    expect(manager.getCurrentOrNextForFamily("codex")?.refreshToken).toBe("token-legacy");
+  });
+
   it("returns false for invalid account index in availability checks", () => {
     const now = Date.now();
     const stored = {
diff --git a/test/auto-update-checker.test.ts b/test/auto-update-checker.test.ts
index 61572330..21f46e71 100644
--- a/test/auto-update-checker.test.ts
+++ b/test/auto-update-checker.test.ts
@@ -5,6 +5,8 @@ vi.mock("node:fs", () => ({
 	writeFileSync: vi.fn(),
 	existsSync: vi.fn(),
 	mkdirSync: vi.fn(),
+	renameSync: vi.fn(),
+	unlinkSync: vi.fn(),
 }));
 
 describe("auto-update-checker", () => {
@@ -12,6 +14,11 @@ describe("auto-update-checker", () => {
 	let checkForUpdates: typeof import("../lib/auto-update-checker.js").checkForUpdates;
 	let checkAndNotify: typeof import("../lib/auto-update-checker.js").checkAndNotify;
 	let clearUpdateCache: typeof import("../lib/auto-update-checker.js").clearUpdateCache;
+	let logger: {
+		debug: ReturnType<typeof vi.fn>;
+		info: ReturnType<typeof vi.fn>;
+		warn: ReturnType<typeof vi.fn>;
+	};
 
 	const mockPackageJson = { version: "4.12.0" };
 
@@ -20,6 +27,14 @@ describe("auto-update-checker", () => {
 		vi.useFakeTimers();
 		vi.setSystemTime(new Date("2026-01-30T12:00:00Z"));
 		mockPackageJson.version = "4.12.0";
+		logger = {
+			debug: vi.fn(),
+			info: vi.fn(),
+			warn: vi.fn(),
+		};
+		vi.doMock("../lib/logger.js", () => ({
+			createLogger: () => logger,
+		}));
 
 		fs = await import("node:fs");
 		vi.mocked(fs.readFileSync).mockImplementation((path: unknown) => {
@@ -226,6 +241,28 @@ describe("auto-update-checker", () => {
 	});
 
 	describe("checkForUpdates", () => {
+		it("logs debug details when package metadata cannot be parsed", async () => {
+			vi.mocked(fs.readFileSync).mockImplementation((path: unknown) => {
+				if (String(path).includes("package.json")) {
+					return "{";
+				}
+				throw new Error("File not found");
+			});
+			vi.mocked(globalThis.fetch).mockResolvedValue({
+				ok: true,
+				json: async () => ({ version: "5.0.0" }),
+			} as Response);
+
+			const result = await checkForUpdates(true);
+
+			expect(result.currentVersion).toBe("0.0.0");
+			expect(globalThis.fetch).toHaveBeenCalled();
+			expect(logger.debug).toHaveBeenCalledWith(
+				"Failed to read current package version",
+				expect.objectContaining({ error: expect.any(String) }),
+			);
+		});
+
 		it("uses cache when check is recent", async () => {
 			const cacheData = {
 				lastCheck: Date.now() - 1000 * 60 * 60,
@@ -250,6 +287,62 @@ describe("auto-update-checker", () => {
 			expect(result.latestVersion).toBe("5.0.0");
 		});
 
+		it("logs debug details when cached update JSON is unreadable", async () => {
+			vi.mocked(fs.existsSync).mockReturnValue(true);
+			vi.mocked(fs.readFileSync).mockImplementation((path: unknown) => {
+				if (String(path).includes("package.json")) {
+					return JSON.stringify(mockPackageJson);
+				}
+				if (String(path).includes("update-check-cache.json")) {
+					return "{";
+				}
+				throw new Error("File not found");
+			});
+			vi.mocked(globalThis.fetch).mockResolvedValue({
+				ok: true,
+				json: async () => ({ version: "5.0.0" }),
+			} as Response);
+
+			await checkForUpdates();
+
+			expect(logger.debug).toHaveBeenCalledWith(
+				"Failed to load update cache",
+				expect.objectContaining({ error: expect.any(String) }),
+			);
+			expect(globalThis.fetch).toHaveBeenCalled();
+		});
+
+		it.each(["EBUSY", "EPERM"] as const)(
+			"logs debug details when cache read fails on windows-style lock (%s)",
+			async (code) => {
+				vi.mocked(fs.existsSync).mockReturnValue(true);
+				vi.mocked(fs.readFileSync).mockImplementation((path: unknown) => {
+					if (String(path).includes("package.json")) {
+						return JSON.stringify(mockPackageJson);
+					}
+					if (String(path).includes("update-check-cache.json")) {
+						const error = new Error(`${code}: locked`) as NodeJS.ErrnoException;
+						error.code = code;
+						error.name = code;
+						throw error;
+					}
+					throw new Error("File not found");
+				});
+				vi.mocked(globalThis.fetch).mockResolvedValue({
+					ok: true,
+					json: async () => ({ version: "5.0.0" }),
+				} as Response);
+
+				await checkForUpdates();
+
+				expect(logger.debug).toHaveBeenCalledWith(
+					"Failed to load update cache",
+					expect.objectContaining({ error: expect.stringContaining(code) }),
+				);
+				expect(globalThis.fetch).toHaveBeenCalled();
+			},
+		);
+
 		it("handles cached null latestVersion without update", async () => {
 			const cacheData = {
 				lastCheck: Date.now() - 1000 * 60 * 60,
@@ -358,11 +451,13 @@ describe("auto-update-checker", () => {
 			await checkForUpdates(true);
 
 			expect(fs.writeFileSync).toHaveBeenCalled();
+			expect(fs.renameSync).toHaveBeenCalled();
 			const writeCall = vi.mocked(fs.writeFileSync).mock.calls[0];
 			const savedData = JSON.parse(writeCall[1] as string) as {
 				latestVersion: string;
 			};
 			expect(savedData.latestVersion).toBe("5.0.0");
+			expect(String(writeCall[0])).toContain("update-check-cache.json.");
 		});
 
 		it("creates cache directory if missing", async () => {
@@ -383,8 +478,8 @@ describe("auto-update-checker", () => {
 			"retries cache writes when filesystem is transiently locked (%s)",
 			async (code) => {
 			let attempts = 0;
-			vi.mocked(fs.writeFileSync).mockClear();
-			vi.mocked(fs.writeFileSync).mockImplementation(() => {
+			vi.mocked(fs.renameSync).mockClear();
+			vi.mocked(fs.renameSync).mockImplementation(() => {
 				attempts += 1;
 				if (attempts < 3) {
 					const error = new Error("busy") as NodeJS.ErrnoException;
@@ -400,10 +495,36 @@ describe("auto-update-checker", () => {
 
 			await checkForUpdates(true);
 
-			expect(fs.writeFileSync).toHaveBeenCalledTimes(3);
+			expect(fs.renameSync).toHaveBeenCalledTimes(3);
 			},
 		);
 
+		it("serializes concurrent cache writes through temp-file renames", async () => {
+			vi.mocked(fs.writeFileSync).mockClear();
+			vi.mocked(fs.renameSync).mockClear();
+			vi.mocked(globalThis.fetch)
+				.mockResolvedValueOnce({
+					ok: true,
+					json: async () => ({ version: "5.0.0" }),
+				} as Response)
+				.mockResolvedValueOnce({
+					ok: true,
+					json: async () => ({ version: "5.0.1" }),
+				} as Response);
+
+			await Promise.all([checkForUpdates(true), checkForUpdates(true)]);
+
+			expect(fs.renameSync).toHaveBeenCalledTimes(2);
+			const writeTargets = vi
+				.mocked(fs.writeFileSync)
+				.mock.calls.map((call) => String(call[0]));
+			expect(
+				writeTargets.every((target) =>
+					target.includes("update-check-cache.json."),
+				),
+			).toBe(true);
+		});
+
 		it("includes updateCommand in result", async () => {
 			vi.mocked(globalThis.fetch).mockResolvedValue({
 				ok: true,
@@ -481,14 +602,20 @@ describe("auto-update-checker", () => {
 		it("writes empty object when cache exists", () => {
 			vi.mocked(fs.existsSync).mockReturnValue(true);
 			vi.mocked(fs.writeFileSync).mockClear();
+			vi.mocked(fs.renameSync).mockClear();
 
 			clearUpdateCache();
 
-			expect(fs.writeFileSync).toHaveBeenCalledWith(
-				expect.stringContaining("update-check-cache.json"),
-				"{}",
-				"utf8"
-			);
+			return Promise.resolve().then(() => {
+				return Promise.resolve().then(() => {
+					expect(fs.writeFileSync).toHaveBeenCalledWith(
+						expect.stringContaining("update-check-cache.json."),
+						"{}",
+						"utf8"
+					);
+					expect(fs.renameSync).toHaveBeenCalled();
+				});
+			});
 		});
 
 		it("does nothing when cache does not exist", () => {
@@ -499,5 +626,24 @@ describe("auto-update-checker", () => {
 
 			expect(fs.writeFileSync).not.toHaveBeenCalled();
 		});
+
+		it("keeps clearUpdateCache ordered after the latest save", async () => {
+			vi.mocked(fs.existsSync).mockReturnValue(true);
+			vi.mocked(fs.writeFileSync).mockClear();
+			vi.mocked(fs.renameSync).mockClear();
+			vi.mocked(globalThis.fetch).mockResolvedValue({
+				ok: true,
+				json: async () => ({ version: "5.0.0" }),
+			} as Response);
+
+			await checkForUpdates(true);
+			clearUpdateCache();
+			await Promise.resolve();
+			await Promise.resolve();
+
+			const writes = vi.mocked(fs.writeFileSync).mock.calls;
+			expect(writes).toHaveLength(2);
+			expect(writes.at(-1)?.[1]).toBe("{}");
+		});
 	});
 });
diff --git a/test/codex-manager-cli.test.ts b/test/codex-manager-cli.test.ts
index 8f30b395..f63b5a91 100644
--- a/test/codex-manager-cli.test.ts
+++ b/test/codex-manager-cli.test.ts
@@ -7152,6 +7152,53 @@ describe("codex manager cli commands", () => {
 		expect(firstCallAccounts[1]?.quotaSummary).toBe("5h 90%");
 	});
 
+	it("surfaces persisted account rate limits when quota cache is empty", async () => {
+		const now = Date.now();
+		loadAccountsMock.mockResolvedValue({
+			version: 3,
+			activeIndex: 0,
+			activeIndexByFamily: { codex: 0 },
+			accounts: [
+				{
+					email: "rate-limited@example.com",
+					accountId: "acc_rate_limited",
+					refreshToken: "refresh-rate-limited",
+					accessToken: "access-rate-limited",
+					expiresAt: now + 3_600_000,
+					addedAt: now - 1_000,
+					lastUsed: now - 1_000,
+					enabled: true,
+					rateLimitResetTimes: {
+						codex: now + 60_000,
+					},
+				},
+			],
+		});
+		loadDashboardDisplaySettingsMock.mockResolvedValue(
+			createReadyFirstMenuSettings({ menuAutoFetchLimits: false }),
+		);
+		loadQuotaCacheMock.mockResolvedValue({ byAccountId: {}, byEmail: {} });
+		promptLoginModeMock.mockResolvedValueOnce({ mode: "cancel" });
+
+		const { runCodexMultiAuthCli } = await import("../lib/codex-manager.js");
+		const exitCode = await runCodexMultiAuthCli(["auth", "login"]);
+
+		expect(exitCode).toBe(0);
+		const firstCallAccounts = promptLoginModeMock.mock.calls[0]?.[0] as Array<{
+			email?: string;
+			quotaRateLimited?: boolean;
+			quota5hResetAtMs?: number;
+			quotaSummary?: string;
+			status?: string;
+		}>;
+		expect(firstCallAccounts[0]?.email).toBe("rate-limited@example.com");
+		expect(firstCallAccounts[0]?.quotaRateLimited).toBe(true);
+		expect(firstCallAccounts[0]?.quota5hResetAtMs).toBe(now + 60_000);
+		expect(firstCallAccounts[0]?.quotaSummary).toBe("rate-limited");
+		expect(firstCallAccounts[0]?.status).toBe("rate-limited");
+		expect(fetchCodexQuotaSnapshotMock).not.toHaveBeenCalled();
+	});
+
 	it("treats accounts with no quota windows as the lowest ready-first floor", async () => {
 		const now = Date.now();
 		loadAccountsMock.mockResolvedValue({
diff --git a/test/codex-manager-report-command.test.ts b/test/codex-manager-report-command.test.ts
index 1eac66d7..4704c2e5 100644
--- a/test/codex-manager-report-command.test.ts
+++ b/test/codex-manager-report-command.test.ts
@@ -3,7 +3,7 @@ import {
 	type ReportCommandDeps,
 	runReportCommand,
 } from "../lib/codex-manager/commands/report.js";
-import type { AccountStorageV3 } from "../lib/storage.js";
+import type { AccountStorageV3, StorageHealthSummary } from "../lib/storage.js";
 
 function createStorage(
 	accounts: AccountStorageV3["accounts"] = [
@@ -50,6 +50,15 @@ function createDeps(
 			secondary: {},
 		})),
 		formatRateLimitEntry: vi.fn(() => null),
+		inspectStorageHealth: vi.fn(async (): Promise<StorageHealthSummary> => ({
+			state: "healthy",
+			path: "/mock/openai-codex-accounts.json",
+			resetMarkerPath: "/mock/openai-codex-accounts.json.intentional-reset",
+			walPath: "/mock/openai-codex-accounts.json.wal",
+			hasResetMarker: false,
+			hasWal: false,
+		})),
+		loadRuntimeObservabilitySnapshot: vi.fn(async () => null),
 		normalizeFailureDetail: vi.fn((message) => message ?? "unknown"),
 		logInfo: vi.fn(),
 		logError: vi.fn(),
@@ -81,6 +90,33 @@ describe("runReportCommand", () => {
 		expect(deps.logError).toHaveBeenCalledWith("Unknown option: --bogus");
 	});
 
+	it("rejects invalid live probe budget values", async () => {
+		const deps = createDeps();
+
+		const result = await runReportCommand(["--max-probes", "0"], deps);
+
+		expect(result).toBe(1);
+		expect(deps.logError).toHaveBeenCalledWith(
+			"--max-probes must be a positive integer",
+		);
+	});
+
+	it.each([
+		["--max-accounts", "1.9", "--max-accounts must be a positive integer"],
+		["--max-accounts", "1e3", "--max-accounts must be a positive integer"],
+		["--max-accounts", "2foo", "--max-accounts must be a positive integer"],
+		["--max-probes", "1.9", "--max-probes must be a positive integer"],
+		["--max-probes", "1e3", "--max-probes must be a positive integer"],
+		["--max-probes", "2foo", "--max-probes must be a positive integer"],
+	] as const)("rejects malformed numeric flags %s %s", async (flag, value, message) => {
+		const deps = createDeps();
+
+		const result = await runReportCommand([flag, value], deps);
+
+		expect(result).toBe(1);
+		expect(deps.logError).toHaveBeenCalledWith(message);
+	});
+
 	it("writes json report output when requested", async () => {
 		const deps = createDeps();
 
@@ -97,6 +133,125 @@ describe("runReportCommand", () => {
 		expect(deps.logInfo).toHaveBeenCalledWith(
 			expect.stringContaining('"forecast"'),
 		);
+		expect(deps.logInfo).toHaveBeenCalledWith(
+			expect.stringContaining('"liveProbeBudget"'),
+		);
+		expect(deps.logInfo).toHaveBeenCalledWith(
+			expect.stringContaining('"storageHealth"'),
+		);
+	});
+
+	it("includes runtime observability fields in json output when snapshot is available", async () => {
+		const deps = createDeps({
+			loadRuntimeObservabilitySnapshot: vi.fn(async () => ({
+				version: 1,
+				updatedAt: 2000,
+				responsesRequests: 4,
+				authRefreshRequests: 2,
+				diagnosticProbeRequests: 1,
+				currentRequestId: "req_123",
+				poolExhaustionCooldownUntil: 9000,
+				serverBurstCooldownUntil: 12000,
+				runtimeMetrics: {
+					startedAt: 1000,
+					totalRequests: 4,
+					successfulRequests: 3,
+					failedRequests: 1,
+					responsesRequests: 4,
+					authRefreshRequests: 2,
+					diagnosticProbeRequests: 1,
+					outboundRequestAttemptBudget: 6,
+					outboundRequestAttemptsConsumed: 5,
+					requestAttemptBudgetExhaustions: 0,
+					poolExhaustionFastFails: 1,
+					serverBurstFastFails: 0,
+					rateLimitedResponses: 1,
+					serverErrors: 0,
+					networkErrors: 0,
+					userAborts: 0,
+					authRefreshFailures: 0,
+					emptyResponseRetries: 0,
+					accountRotations: 1,
+					sameAccountRetries: 0,
+					streamFailoverAttempts: 0,
+					streamFailoverCandidatesConsidered: 0,
+					lastStreamFailoverCandidateCount: 0,
+					streamFailoverRecoveries: 0,
+					streamFailoverCrossAccountRecoveries: 0,
+					cumulativeLatencyMs: 42,
+					lastRequestAt: 1999,
+					lastError: null,
+				},
+			})),
+		});
+
+		const result = await runReportCommand(["--json"], deps);
+
+		expect(result).toBe(0);
+		const jsonOutput = JSON.parse(
+			(deps.logInfo as ReturnType<typeof vi.fn>).mock.calls.at(-1)?.[0] ?? "{}",
+		) as {
+			runtime: {
+				poolExhaustionCooldownUntil: number;
+				serverBurstCooldownUntil: number;
+				runtimeMetrics: Record<string, unknown>;
+			};
+		};
+		expect(jsonOutput.runtime.poolExhaustionCooldownUntil).toBe(9000);
+		expect(jsonOutput.runtime.serverBurstCooldownUntil).toBe(12000);
+		expect(jsonOutput.runtime.runtimeMetrics).toBeDefined();
+	});
+
+	it("respects live probe account and probe budgets", async () => {
+		const deps = createDeps({
+			loadAccounts: vi.fn(async () =>
+				createStorage([
+					{ email: "one@example.com", refreshToken: "r1", accessToken: "a1", accountId: "acct-1", expiresAt: 5_000, addedAt: 1, lastUsed: 1, enabled: true },
+					{ email: "two@example.com", refreshToken: "r2", accessToken: "a2", accountId: "acct-2", expiresAt: 5_000, addedAt: 2, lastUsed: 2, enabled: true },
+					{ email: "three@example.com", refreshToken: "r3", accessToken: "a3", accountId: "acct-3", expiresAt: 5_000, addedAt: 3, lastUsed: 3, enabled: true },
+				]),
+			),
+			hasUsableAccessToken: vi.fn(() => true),
+		});
+
+		const result = await runReportCommand(
+			["--live", "--json", "--max-accounts", "2", "--max-probes", "1"],
+			deps,
+		);
+
+		expect(result).toBe(0);
+		expect(deps.fetchCodexQuotaSnapshot).toHaveBeenCalledTimes(1);
+		const jsonOutput = JSON.parse(
+			(deps.logInfo as ReturnType<typeof vi.fn>).mock.calls.at(-1)?.[0] ?? "{}",
+		) as { liveProbeBudget: { consideredAccounts: number; executedProbes: number }; forecast: { probeErrors: string[] } };
+		expect(jsonOutput.liveProbeBudget).toEqual(
+			expect.objectContaining({ consideredAccounts: 2, executedProbes: 1 }),
+		);
+		expect(jsonOutput.forecast.probeErrors).toEqual(
+			expect.arrayContaining([
+				expect.stringContaining("live probe request budget reached (1)"),
+			]),
+		);
+	});
+
+	it("skips refreshes in cached-only live mode", async () => {
+		const deps = createDeps({
+			hasUsableAccessToken: vi.fn(() => false),
+		});
+
+		const result = await runReportCommand(["--live", "--json", "--cached-only"], deps);
+
+		expect(result).toBe(0);
+		expect(deps.queuedRefresh).not.toHaveBeenCalled();
+		expect(deps.fetchCodexQuotaSnapshot).not.toHaveBeenCalled();
+		const jsonOutput = JSON.parse(
+			(deps.logInfo as ReturnType<typeof vi.fn>).mock.calls.at(-1)?.[0] ?? "{}",
+		) as { forecast: { probeErrors: string[] } };
+		expect(jsonOutput.forecast.probeErrors).toEqual(
+			expect.arrayContaining([
+				expect.stringContaining("skipped refresh because --cached-only is enabled"),
+			]),
+		);
 	});
 
 	it("covers live probe refresh failures, missing account ids, and probe errors", async () => {
@@ -191,6 +346,9 @@ describe("runReportCommand", () => {
 			"token expired",
 		);
 		expect(jsonOutput.forecast.accounts[3]?.liveQuota?.planType).toBe("pro");
+		expect(jsonOutput.forecast.recommendation.selectedReason).toEqual(
+			"Lowest risk ready account (low, score 0).",
+		);
 	});
 
 	it("reuses usable access tokens for live probes without forcing refresh", async () => {
@@ -361,6 +519,61 @@ describe("runReportCommand", () => {
 		);
 	});
 
+	it("does not mutate the in-memory report snapshot when refreshed token persistence fails", async () => {
+		const storage = createStorage([
+			{
+				email: "persist-fail@example.com",
+				accountId: "acct-report",
+				accountIdSource: "org",
+				refreshToken: "refresh-token-1",
+				accessToken: "access-token-1",
+				expiresAt: 10,
+				addedAt: 1,
+				lastUsed: 1,
+				enabled: true,
+			},
+		]);
+		const deps = createDeps({
+			loadAccounts: vi.fn(async () => structuredClone(storage)),
+			saveAccounts: vi.fn(async () => {
+				throw Object.assign(new Error("EPERM write blocked"), {
+					code: "EPERM",
+				});
+			}),
+			queuedRefresh: vi.fn(async () => ({
+				type: "success",
+				access: "access-token-updated",
+				refresh: "refresh-token-updated",
+				expires: 500,
+				idToken: "id-token-updated",
+			})),
+			fetchCodexQuotaSnapshot: vi.fn(async () => ({
+				status: 200,
+				model: "gpt-5-codex",
+				primary: {},
+				secondary: {},
+			})),
+		});
+
+		const result = await runReportCommand(["--live", "--json"], deps);
+
+		expect(result).toBe(0);
+		expect(deps.fetchCodexQuotaSnapshot).not.toHaveBeenCalled();
+		expect(deps.saveAccounts).toHaveBeenCalledTimes(4);
+		const jsonOutput = JSON.parse(
+			(deps.logInfo as ReturnType<typeof vi.fn>).mock.calls.at(-1)?.[0] ?? "{}",
+		) as {
+			forecast: { probeErrors: string[]; accounts: Array<{ label: string }> };
+		};
+		expect(jsonOutput.forecast.probeErrors).toEqual(
+			expect.arrayContaining([
+				expect.stringContaining("EPERM write blocked"),
+			]),
+		);
+		expect(storage.accounts[0]?.refreshToken).toBe("refresh-token-1");
+		expect(storage.accounts[0]?.accessToken).toBe("access-token-1");
+	});
+
 	it("prints a human-readable report and announces the output path", async () => {
 		const deps = createDeps();
 
diff --git a/test/codex-manager-status-command.test.ts b/test/codex-manager-status-command.test.ts
index dc189228..2bd1f625 100644
--- a/test/codex-manager-status-command.test.ts
+++ b/test/codex-manager-status-command.test.ts
@@ -5,7 +5,7 @@ import {
 	runStatusCommand,
 	type StatusCommandDeps,
 } from "../lib/codex-manager/commands/status.js";
-import type { AccountStorageV3 } from "../lib/storage.js";
+import type { AccountStorageV3, StorageHealthSummary } from "../lib/storage.js";
 
 function createStorage(): AccountStorageV3 {
 	return {
@@ -39,6 +39,14 @@ function createStatusDeps(
 		loadAccounts: vi.fn(async () => createStorage()),
 		resolveActiveIndex: vi.fn(() => 0),
 		formatRateLimitEntry: vi.fn(() => null),
+		inspectStorageHealth: vi.fn(async (): Promise<StorageHealthSummary> => ({
+			state: "healthy",
+			path: "/tmp/codex.json",
+			resetMarkerPath: "/tmp/codex.json.intentional-reset",
+			walPath: "/tmp/codex.json.wal",
+			hasResetMarker: false,
+			hasWal: false,
+		})),
 		getNow: vi.fn(() => 2_000),
 		logInfo: vi.fn(),
 		...overrides,
@@ -55,6 +63,29 @@ describe("runStatusCommand", () => {
 		expect(deps.getStoragePath).toHaveBeenCalledTimes(1);
 		expect(deps.logInfo).toHaveBeenCalledWith("No accounts configured.");
 		expect(deps.logInfo).toHaveBeenCalledWith("Storage: /tmp/codex.json");
+		expect(deps.logInfo).toHaveBeenCalledWith("Storage health: healthy");
+	});
+
+	it("prints explicit corrupt storage state for empty result cases", async () => {
+		const deps = createStatusDeps({
+			loadAccounts: vi.fn(async () => null),
+			inspectStorageHealth: vi.fn(async () => ({
+				state: "corrupt",
+				path: "/tmp/codex.json",
+				resetMarkerPath: "/tmp/codex.json.intentional-reset",
+				walPath: "/tmp/codex.json.wal",
+				hasResetMarker: false,
+				hasWal: false,
+				details: "Unexpected token",
+			})),
+		});
+
+		await runStatusCommand(deps);
+
+		expect(deps.logInfo).toHaveBeenCalledWith(
+			"No accounts configured. Storage appears corrupted.",
+		);
+		expect(deps.logInfo).toHaveBeenCalledWith("Storage health: corrupt");
 	});
 
 	it("prints account rows with current and disabled markers", async () => {
@@ -68,11 +99,17 @@ describe("runStatusCommand", () => {
 		expect(deps.getStoragePath).toHaveBeenCalledTimes(1);
 		expect(deps.logInfo).toHaveBeenCalledWith("Accounts (2)");
 		expect(deps.logInfo).toHaveBeenCalledWith("Storage: /tmp/codex.json");
+		expect(deps.logInfo).toHaveBeenCalledWith(
+			expect.stringContaining("Selection reason: account 1"),
+		);
 		expect(deps.logInfo).toHaveBeenCalledWith(
 			expect.stringContaining(
 				"1. Account 1 (one@example.com) [current, rate-limited]",
 			),
 		);
+		expect(deps.logInfo).toHaveBeenCalledWith(
+			expect.stringContaining("reason:"),
+		);
 		expect(deps.logInfo).toHaveBeenCalledWith(
 			expect.stringContaining(
 				"2. Account 2 (two@example.com) [disabled, rate-limited]",
diff --git a/test/config-explain.test.ts b/test/config-explain.test.ts
index fc01b284..4214764f 100644
--- a/test/config-explain.test.ts
+++ b/test/config-explain.test.ts
@@ -39,6 +39,10 @@ describe("getPluginConfigExplainReport", () => {
 		delete process.env.CODEX_MODE;
 		delete process.env.CODEX_AUTH_FAST_SESSION_STRATEGY;
 		delete process.env.CODEX_MULTI_AUTH_CONFIG_PATH;
+		delete process.env.CODEX_AUTH_RATE_LIMIT_DEDUP_WINDOW_MS;
+		delete process.env.CODEX_AUTH_RATE_LIMIT_STATE_RESET_MS;
+		delete process.env.CODEX_AUTH_RATE_LIMIT_MAX_BACKOFF_MS;
+		delete process.env.CODEX_AUTH_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS;
 		for (const configPath of tempConfigPaths) {
 			await removeWithRetry(configPath, { force: true }).catch(() => {});
 		}
@@ -128,12 +132,12 @@ describe("getPluginConfigExplainReport", () => {
 		expect(fallback?.source).not.toBe("file");
 	});
 
-	it("normalizes non-finite values for json-safe output", async () => {
+	it("reports retry-all max retries honestly for json-safe output", async () => {
 		const { getPluginConfigExplainReport } = await import("../lib/config.js");
 		const report = getPluginConfigExplainReport();
 		const entry = expectEntry(report, "retryAllAccountsMaxRetries");
-		expect(entry?.value).toBe("Infinity");
-		expect(entry?.defaultValue).toBe("Infinity");
+		expect(entry?.value).toBe(0);
+		expect(entry?.defaultValue).toBe(0);
 		const serialized = JSON.parse(JSON.stringify(report)) as {
 			entries: Array<{ key: string; value: unknown; defaultValue: unknown }>;
 		};
@@ -141,11 +145,47 @@ describe("getPluginConfigExplainReport", () => {
 			(item) => item.key === "retryAllAccountsMaxRetries",
 		);
 		expect(serializedEntry).toMatchObject({
-			value: "Infinity",
-			defaultValue: "Infinity",
+			value: 0,
+			defaultValue: 0,
 		});
 	});
 
+	it("covers the rate-limit explain rows and env sources", async () => {
+		const { getPluginConfigExplainReport } = await import("../lib/config.js");
+		const keys = [
+			"rateLimitDedupWindowMs",
+			"rateLimitStateResetMs",
+			"rateLimitMaxBackoffMs",
+			"rateLimitShortRetryThresholdMs",
+		] as const;
+		const report = getPluginConfigExplainReport();
+		for (const key of keys) {
+			expect(expectEntry(report, key)).toBeDefined();
+		}
+
+		const serialized = JSON.parse(JSON.stringify(report)) as {
+			entries: Array<{ key: string; value: unknown; defaultValue: unknown; source: string }>;
+		};
+		for (const key of keys) {
+			const entry = serialized.entries.find((item) => item.key === key);
+			expect(entry).toBeDefined();
+			expect(entry?.value).toBeDefined();
+			expect(entry?.defaultValue).toBeDefined();
+		}
+
+		vi.resetModules();
+		process.env.CODEX_AUTH_RATE_LIMIT_DEDUP_WINDOW_MS = "3210";
+		process.env.CODEX_AUTH_RATE_LIMIT_STATE_RESET_MS = "654321";
+		process.env.CODEX_AUTH_RATE_LIMIT_MAX_BACKOFF_MS = "12345";
+		process.env.CODEX_AUTH_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS = "250";
+		const envMod = await import("../lib/config.js");
+		const envReport = envMod.getPluginConfigExplainReport();
+		for (const key of keys) {
+			const entry = expectEntry(envReport, key);
+			expect(entry?.source).toBe("env");
+		}
+	});
+
 	it("reports default and env sources", async () => {
 		const mod = await import("../lib/config.js");
 		let report = mod.getPluginConfigExplainReport();
diff --git a/test/failure-policy.test.ts b/test/failure-policy.test.ts
index bdcb404f..97d17d83 100644
--- a/test/failure-policy.test.ts
+++ b/test/failure-policy.test.ts
@@ -65,6 +65,7 @@ describe("failure policy", () => {
 		expect(decision.retryDelayMs).toBe(500);
 		expect(decision.rotateAccount).toBe(false);
 		expect(decision.handoffStrategy).toBe("hard");
+		expect(decision.cooldownReason).toBe("server-error");
 	});
 
 	it("marks rate limit without cooldown mutation", () => {
@@ -146,6 +147,7 @@ describe("failure policy", () => {
 		expect(decision.retrySameAccount).toBe(false);
 		expect(decision.rotateAccount).toBe(true);
 		expect(decision.cooldownMs).toBe(3_000);
+		expect(decision.cooldownReason).toBe("server-error");
 	});
 
 	it("uses override cooldowns for network and server kinds", () => {
diff --git a/test/fetch-helpers.test.ts b/test/fetch-helpers.test.ts
index 3b5f9a28..9362ce6e 100644
--- a/test/fetch-helpers.test.ts
+++ b/test/fetch-helpers.test.ts
@@ -464,34 +464,97 @@ describe('Fetch Helpers Module', () => {
 	    expect(headers.get('accept')).toBe('text/event-stream');
     });
 
-                it('maps usage-limit 404 errors to 429', async () => {
-                        const body = {
-                                error: {
-                                        code: 'usage_limit_reached',
-                                        message: 'limit reached',
-                                },
-                        };
-                        const resp = new Response(JSON.stringify(body), { status: 404 });
-                        const { response: mapped, rateLimit } = await handleErrorResponse(resp);
-                        expect(mapped.status).toBe(429);
-                        const json = await mapped.json() as any;
-                        expect(json.error.code).toBe('usage_limit_reached');
-                        expect(rateLimit?.retryAfterMs).toBeGreaterThan(0);
-                });
-
-                it('leaves non-usage 404 errors unchanged', async () => {
-                        const body = { error: { code: 'not_found', message: 'nope' } };
-                        const resp = new Response(JSON.stringify(body), { status: 404 });
-                        const { response: result, rateLimit } = await handleErrorResponse(resp);
-                        expect(result.status).toBe(404);
-                        const json = await result.json() as any;
-                        expect(json.error.code).toBe('not_found');
-                        expect(rateLimit).toBeUndefined();
-                });
+		it('maps usage-limit 404 errors to 429', async () => {
+			const body = {
+				error: {
+					code: 'usage_limit_reached',
+					message: 'limit reached',
+				},
+			};
+			const resp = new Response(JSON.stringify(body), { status: 404 });
+			const { response: mapped, rateLimit } = await handleErrorResponse(resp);
+			expect(mapped.status).toBe(429);
+			const json = await mapped.json() as any;
+			expect(json.error.code).toBe('usage_limit_reached');
+			expect(rateLimit?.retryAfterMs).toBeGreaterThan(0);
+		});
+
+		it('maps usage-limit 404 errors to 429 when the signal comes from error.type', async () => {
+			const body = {
+				error: {
+					type: 'usage_limit_reached',
+					message: 'limit reached',
+				},
+			};
+			const resp = new Response(JSON.stringify(body), { status: 404 });
+			const { response: mapped, rateLimit } = await handleErrorResponse(resp);
+			expect(mapped.status).toBe(429);
+			const json = await mapped.json() as { error: { type?: string } };
+			expect(json.error.type).toBe('usage_limit_reached');
+			expect(rateLimit?.retryAfterMs).toBeGreaterThan(0);
+		});
+
+		it('maps usage-limit 404 errors to 429 when code and type disagree', async () => {
+			const body = {
+				error: {
+					code: 'rate_limit_exceeded',
+					type: 'usage_limit_reached',
+					message: 'limit reached',
+				},
+			};
+			const resp = new Response(JSON.stringify(body), { status: 404 });
+			const { response: mapped, rateLimit } = await handleErrorResponse(resp);
+			expect(mapped.status).toBe(429);
+			expect(rateLimit?.retryAfterMs).toBeGreaterThan(0);
+		});
+
+		it('maps entitlement-style 404 errors to 403 when only error.type carries the signal', async () => {
+			const body = {
+				error: {
+					code: 'not_found',
+					type: 'usage_not_included',
+					message: 'Not included in your plan',
+				},
+			};
+			const resp = new Response(JSON.stringify(body), { status: 404 });
+			const { response: mapped, rateLimit } = await handleErrorResponse(resp);
+			expect(mapped.status).toBe(403);
+			const json = await mapped.json() as { error: { code?: string; message?: string } };
+			expect(json.error.code).toBe('usage_not_included');
+			expect(json.error.message).toContain('not included');
+			expect(rateLimit).toBeUndefined();
+		});
+
+		it('prioritizes entitlement over rate-limit when signals conflict', async () => {
+			const body = {
+				error: {
+					code: 'usage_not_included',
+					type: 'usage_limit_reached',
+					message: 'mixed signals',
+				},
+			};
+			const resp = new Response(JSON.stringify(body), { status: 404 });
+			const { response: mapped, rateLimit } = await handleErrorResponse(resp);
+			expect(mapped.status).toBe(403);
+			expect(rateLimit).toBeUndefined();
+		});
+
+		it('leaves non-usage 404 errors unchanged', async () => {
+			const body = { error: { code: 'not_found', message: 'nope' } };
+			const resp = new Response(JSON.stringify(body), { status: 404 });
+			const { response: result, rateLimit } = await handleErrorResponse(resp);
+			expect(result.status).toBe(404);
+			const json = await result.json() as any;
+			expect(json.error.code).toBe('not_found');
+			expect(rateLimit).toBeUndefined();
+		});
 
 		it('should remove x-api-key header', () => {
-        const init = { headers: { 'x-api-key': 'should-be-removed' } } as any;
-        const headers = createCodexHeaders(init, accountId, accessToken, { model: 'gpt-5', promptCacheKey: 'session-2' });
+			const init = { headers: { 'x-api-key': 'should-be-removed' } } as any;
+			const headers = createCodexHeaders(init, accountId, accessToken, {
+				model: 'gpt-5',
+				promptCacheKey: 'session-2',
+			});
 
 			expect(headers.has('x-api-key')).toBe(false);
 		});
@@ -981,15 +1044,41 @@ describe('createEntitlementErrorResponse', () => {
 	});
 
 	describe('handleErrorResponse edge cases', () => {
-		it('handles 404 with non-JSON body containing usage limit text', async () => {
+		it('does not remap 404s with free-text usage-limit messages', async () => {
 			const response = new Response('usage limit exceeded - please try again', { status: 404 });
 			
 			const { response: result, rateLimit } = await handleErrorResponse(response);
 			
+			expect(result.status).toBe(404);
+			expect(rateLimit).toBeUndefined();
+		});
+
+		it('remaps structured 404s with rate_limit_exceeded codes', async () => {
+			const response = new Response(
+				JSON.stringify({
+					error: {
+						code: 'rate_limit_exceeded',
+						message: 'upstream overloaded',
+					},
+				}),
+				{ status: 404 },
+			);
+
+			const { response: result, rateLimit } = await handleErrorResponse(response);
+
 			expect(result.status).toBe(429);
 			expect(rateLimit?.retryAfterMs).toBeGreaterThan(0);
 		});
 
+		it('does not remap 404s with malformed json bodies', async () => {
+			const response = new Response('{ invalid json', { status: 404 });
+
+			const { response: result, rateLimit } = await handleErrorResponse(response);
+
+			expect(result.status).toBe(404);
+			expect(rateLimit).toBeUndefined();
+		});
+
 		it('does not treat non-429 rate-limit text as a cooldown signal', async () => {
 			const response = new Response('rate_limit_exceeded - upstream overloaded', {
 				status: 500,
diff --git a/test/index-retry.test.ts b/test/index-retry.test.ts
index 12d43a75..81b91005 100644
--- a/test/index-retry.test.ts
+++ b/test/index-retry.test.ts
@@ -441,6 +441,296 @@ describe("OpenAIAuthPlugin rate-limit retry", () => {
 		expect(response.status).toBe(200);
 	});
 
+	it("does not replay across all accounts by default when every account is rate-limited", async () => {
+		delete process.env.CODEX_AUTH_RETRY_ALL_RATE_LIMITED;
+		delete process.env.CODEX_AUTH_RETRY_ALL_MAX_WAIT_MS;
+		delete process.env.CODEX_AUTH_RETRY_ALL_MAX_RETRIES;
+		vi.resetModules();
+		const secondaryAccount = createMockAccount({
+			index: 1,
+			accountId: "account-2",
+			email: "user2@example.com",
+			refreshToken: "refresh-token-account-2",
+			access: "access-token-account-2",
+		});
+		accountManagerState.accounts = [createMockAccount(), secondaryAccount];
+		accountManagerState.accountSelections = [null];
+
+		const { OpenAIAuthPlugin } = await import("../index.js");
+		const client = {
+			tui: { showToast: vi.fn() },
+			auth: { set: vi.fn() },
+		} as any;
+		const plugin = await OpenAIAuthPlugin({ client });
+
+		const getAuth = async () => ({
+			type: "oauth" as const,
+			access: "a",
+			refresh: "r",
+			expires: Date.now() + 60_000,
+			multiAccount: true,
+		});
+
+		const sdk = (await plugin.auth.loader(getAuth, { options: {}, models: {} })) as any;
+		const response = await sdk.fetch("https://example.com", {});
+		const payload = await response.json();
+
+		expect(globalThis.fetch).not.toHaveBeenCalled();
+		expect(response.status).toBe(429);
+		expect(payload.error.message).toContain("All 2 account(s) are rate-limited.");
+		expect(payload.error.message).toContain("15000ms");
+	});
+
+	it("fast-fails after repeated cross-account 5xx errors arm the server-burst cooldown", async () => {
+		const accounts = Array.from({ length: 4 }, (_, index) =>
+			createMockAccount({
+				index,
+				accountId: `account-${index + 1}`,
+				email: `user${index + 1}@example.com`,
+				refreshToken: `refresh-token-${index + 1}`,
+				access: `access-token-account-${index + 1}`,
+			}),
+		);
+		accountManagerState.accounts = accounts;
+		accountManagerState.accountSelections = [...accounts];
+
+		const fetchMock = vi.fn().mockImplementation(() =>
+			Promise.resolve(
+				new Response(
+					JSON.stringify({
+						error: { code: "server_error", message: "temporary outage" },
+					}),
+					{
+						status: 503,
+						headers: { "content-type": "application/json" },
+					},
+				),
+			),
+		);
+		globalThis.fetch = fetchMock as any;
+
+		const { OpenAIAuthPlugin } = await import("../index.js");
+		const client = {
+			tui: { showToast: vi.fn() },
+			auth: { set: vi.fn() },
+		} as any;
+		const plugin = await OpenAIAuthPlugin({ client });
+
+		const getAuth = async () => ({
+			type: "oauth" as const,
+			access: "a",
+			refresh: "r",
+			expires: Date.now() + 60_000,
+			multiAccount: true,
+		});
+
+		const sdk = (await plugin.auth.loader(getAuth, { options: {}, models: {} })) as any;
+		const { clearPoolExhaustionCooldown } = await import(
+			"../lib/request/request-resilience.js"
+		);
+
+		for (let index = 0; index < 3; index += 1) {
+			accountManagerState.accounts = [accounts[index] as Record<string, unknown>];
+			accountManagerState.accountSelections = [accounts[index] as Record<string, unknown>];
+			const fetchPromise = sdk.fetch("https://example.com", {});
+			await vi.advanceTimersByTimeAsync(2_000);
+			const response = await fetchPromise;
+			expect(response.status).toBe(429);
+			clearPoolExhaustionCooldown();
+		}
+
+		expect(fetchMock).toHaveBeenCalledTimes(3);
+
+		accountManagerState.accounts = [accounts[3] as Record<string, unknown>];
+		accountManagerState.accountSelections = [accounts[3] as Record<string, unknown>];
+		const secondResponse = await sdk.fetch("https://example.com", {});
+		const secondPayload = await secondResponse.json();
+		expect(secondResponse.status).toBe(503);
+		expect(secondPayload.error.message).toContain(
+			"Multiple accounts recently failed with upstream server errors.",
+		);
+		expect(fetchMock).toHaveBeenCalledTimes(3);
+	});
+
+	it("stops after the bounded outbound request budget even when more accounts are available", async () => {
+		const logger = await import("../lib/logger.js");
+		const logDebugSpy = vi.spyOn(logger, "logDebug").mockImplementation(() => {});
+		const logWarnSpy = vi.spyOn(logger, "logWarn").mockImplementation(() => {});
+
+		const accounts = Array.from({ length: 8 }, (_, index) =>
+			createMockAccount({
+				index,
+				accountId: `account-${index + 1}`,
+				email: `user${index + 1}@example.com`,
+				refreshToken: `refresh-token-${index + 1}`,
+				access: `access-token-account-${index + 1}`,
+			}),
+		);
+		accountManagerState.accounts = accounts;
+		accountManagerState.accountSelections = [...accounts];
+
+		const fetchMock = vi.fn().mockImplementation(() =>
+			Promise.resolve(
+				new Response(
+					JSON.stringify({
+						error: { code: "server_error", message: "temporary outage" },
+					}),
+					{
+						status: 500,
+						headers: { "content-type": "application/json" },
+					},
+				),
+			),
+		);
+		globalThis.fetch = fetchMock as any;
+
+		const { OpenAIAuthPlugin } = await import("../index.js");
+		const client = {
+			tui: { showToast: vi.fn() },
+			auth: { set: vi.fn() },
+		} as any;
+		const plugin = await OpenAIAuthPlugin({ client });
+
+		const getAuth = async () => ({
+			type: "oauth" as const,
+			access: "a",
+			refresh: "r",
+			expires: Date.now() + 60_000,
+			multiAccount: true,
+		});
+
+		const sdk = (await plugin.auth.loader(getAuth, { options: {}, models: {} })) as any;
+		const response = await sdk.fetch("https://example.com", {});
+		const payload = await response.json();
+
+		expect(fetchMock).toHaveBeenCalledTimes(6);
+		expect(response.status).toBe(503);
+		expect(payload).toEqual({
+			error: {
+				message:
+					"Request attempt budget exhausted after 6 outbound request(s). Try again after the current retries settle.",
+			},
+		});
+		expect(logDebugSpy).toHaveBeenCalledWith(
+			"Configured outbound request attempt budget.",
+			expect.objectContaining({
+				budget: 6,
+				accountCount: 8,
+				maxSameAccountRetries: 1,
+				emptyResponseMaxRetries: 2,
+				streamFailoverMax: 1,
+			}),
+		);
+		expect(logWarnSpy).toHaveBeenCalledWith(
+			"Request attempt budget exhausted.",
+			expect.objectContaining({
+				reason: "primary",
+				accountIndex: 6,
+				budget: 6,
+				consumed: 6,
+			}),
+		);
+	});
+
+	it("keeps the total request cap when empty-response retries and server-error rotation combine", async () => {
+		const logger = await import("../lib/logger.js");
+		const logWarnSpy = vi.spyOn(logger, "logWarn").mockImplementation(() => {});
+
+		const accounts = Array.from({ length: 6 }, (_, index) =>
+			createMockAccount({
+				index,
+				accountId: `account-${index + 1}`,
+				email: `user${index + 1}@example.com`,
+				refreshToken: `refresh-token-${index + 1}`,
+				access: `access-token-account-${index + 1}`,
+			}),
+		);
+		accountManagerState.accounts = accounts;
+		accountManagerState.accountSelections = [...accounts];
+
+		const serverErrorResponse = () =>
+			new Response(
+				JSON.stringify({
+					error: { code: "server_error", message: "temporary outage" },
+				}),
+				{
+					status: 500,
+					headers: { "content-type": "application/json" },
+				},
+			);
+
+		const fetchMock = vi
+			.fn()
+			.mockResolvedValueOnce(
+				new Response("{}", {
+					status: 200,
+					headers: { "content-type": "application/json" },
+				}),
+			)
+			.mockResolvedValueOnce(serverErrorResponse())
+			.mockResolvedValueOnce(serverErrorResponse())
+			.mockResolvedValueOnce(serverErrorResponse())
+			.mockResolvedValueOnce(serverErrorResponse())
+			.mockResolvedValueOnce(serverErrorResponse());
+		globalThis.fetch = fetchMock as any;
+
+		const { OpenAIAuthPlugin } = await import("../index.js");
+		const client = {
+			tui: { showToast: vi.fn() },
+			auth: { set: vi.fn() },
+		} as any;
+		const plugin = await OpenAIAuthPlugin({ client });
+
+		const getAuth = async () => ({
+			type: "oauth" as const,
+			access: "a",
+			refresh: "r",
+			expires: Date.now() + 60_000,
+			multiAccount: true,
+		});
+
+		const sdk = (await plugin.auth.loader(getAuth, { options: {}, models: {} })) as any;
+		const fetchPromise = sdk.fetch("https://example.com", {});
+
+		await vi.advanceTimersByTimeAsync(1500);
+
+		const response = await fetchPromise;
+		const payload = await response.json();
+
+		expect(fetchMock).toHaveBeenCalledTimes(6);
+		expect(
+			fetchMock.mock.calls.map(
+				(call) => (call[1]?.headers as Headers).get("x-account-id"),
+			),
+		).toEqual([
+			"account-1",
+			"account-1",
+			"account-2",
+			"account-3",
+			"account-4",
+			"account-5",
+		]);
+		expect(response.status).toBe(503);
+		expect(payload).toEqual({
+			error: {
+				message:
+					"Request attempt budget exhausted after 6 outbound request(s). Try again after the current retries settle.",
+			},
+		});
+		expect(logWarnSpy).toHaveBeenCalledWith(
+			"Empty response received (attempt 1/2). Retrying...",
+		);
+		expect(logWarnSpy).toHaveBeenCalledWith(
+			"Request attempt budget exhausted.",
+			expect.objectContaining({
+				reason: "primary",
+				accountIndex: 5,
+				budget: 6,
+				consumed: 6,
+			}),
+		);
+	});
+
 	it("rebuilds request headers after rotating to the next workspace", async () => {
 		const account = createMockAccount({
 			workspaces: [
diff --git a/test/index.test.ts b/test/index.test.ts
index b5aed127..6829d1a7 100644
--- a/test/index.test.ts
+++ b/test/index.test.ts
@@ -6,6 +6,26 @@ const { showRuntimeToastMock } = vi.hoisted(() => ({
 	showRuntimeToastMock: vi.fn(),
 }));
 
+const {
+	configureRateLimitBackoffMock,
+	getRateLimitBackoffMock,
+	getRateLimitShortRetryThresholdMock,
+	resetRateLimitBackoffMock,
+	getRateLimitDedupWindowMsMock,
+	getRateLimitStateResetMsMock,
+	getRateLimitMaxBackoffMsMock,
+	getRateLimitShortRetryThresholdConfigMock,
+} = vi.hoisted(() => ({
+	configureRateLimitBackoffMock: vi.fn(),
+	getRateLimitBackoffMock: vi.fn(() => ({ attempt: 1, delayMs: 1000 })),
+	getRateLimitShortRetryThresholdMock: vi.fn(() => 5000),
+	resetRateLimitBackoffMock: vi.fn(),
+	getRateLimitDedupWindowMsMock: vi.fn(() => 2000),
+	getRateLimitStateResetMsMock: vi.fn(() => 120000),
+	getRateLimitMaxBackoffMsMock: vi.fn(() => 60000),
+	getRateLimitShortRetryThresholdConfigMock: vi.fn(() => 5000),
+}));
+
 vi.mock("@codex-ai/plugin/tool", () => {
 	const makeSchema = () => ({
 		optional: () => makeSchema(),
@@ -80,6 +100,10 @@ vi.mock("../lib/config.js", () => ({
 	getFastSessionStrategy: () => "hybrid",
 	getFastSessionMaxInputItems: () => 30,
 	getRateLimitToastDebounceMs: () => 5000,
+	getRateLimitDedupWindowMs: getRateLimitDedupWindowMsMock,
+	getRateLimitStateResetMs: getRateLimitStateResetMsMock,
+	getRateLimitMaxBackoffMs: getRateLimitMaxBackoffMsMock,
+	getRateLimitShortRetryThresholdMs: getRateLimitShortRetryThresholdConfigMock,
 	getRetryAllAccountsMaxRetries: () => 3,
 	getRetryAllAccountsMaxWaitMs: () => 30000,
 	getRetryAllAccountsRateLimited: () => true,
@@ -204,11 +228,30 @@ vi.mock("../lib/recovery.js", () => ({
 }));
 
 vi.mock("../lib/request/rate-limit-backoff.js", () => ({
-	getRateLimitBackoff: vi.fn(() => ({ attempt: 1, delayMs: 1000 })),
-	RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS: 5000,
-	resetRateLimitBackoff: vi.fn(),
+	configureRateLimitBackoff: configureRateLimitBackoffMock,
+	getRateLimitBackoff: getRateLimitBackoffMock,
+	getRateLimitShortRetryThresholdMs: getRateLimitShortRetryThresholdMock,
+	MAX_SHORT_RETRY_ATTEMPTS: 3,
+	resetRateLimitBackoff: resetRateLimitBackoffMock,
 }));
 
+beforeEach(() => {
+	configureRateLimitBackoffMock.mockReset();
+	getRateLimitBackoffMock.mockReset();
+	getRateLimitBackoffMock.mockImplementation(() => ({ attempt: 1, delayMs: 1000 }));
+	getRateLimitShortRetryThresholdMock.mockReset();
+	getRateLimitShortRetryThresholdMock.mockImplementation(() => 5000);
+	resetRateLimitBackoffMock.mockReset();
+	getRateLimitDedupWindowMsMock.mockReset();
+	getRateLimitDedupWindowMsMock.mockImplementation(() => 2000);
+	getRateLimitStateResetMsMock.mockReset();
+	getRateLimitStateResetMsMock.mockImplementation(() => 120000);
+	getRateLimitMaxBackoffMsMock.mockReset();
+	getRateLimitMaxBackoffMsMock.mockImplementation(() => 60000);
+	getRateLimitShortRetryThresholdConfigMock.mockReset();
+	getRateLimitShortRetryThresholdConfigMock.mockImplementation(() => 5000);
+});
+
 vi.mock("../lib/runtime/toast.js", async () => {
 	const actual = await vi.importActual<typeof import("../lib/runtime/toast.js")>(
 		"../lib/runtime/toast.js",
@@ -1642,7 +1685,7 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 		expect(secondBody?.previous_response_id).toBe("resp_standalone_789");
 	});
 
-	it("keeps account and previous_response_id aligned across overlapping same-session streams", async () => {
+	it("keeps the newest account and previous_response_id across overlapping same-session streams", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const configModule = await import("../lib/config.js");
 		const fetchHelpers = await import("../lib/request/fetch-helpers.js");
@@ -1748,9 +1791,9 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 			previous_response_id?: string;
 		};
 		const thirdHeaders = new Headers(thirdInit.headers);
-		expect(thirdBody.previous_response_id).toBe("resp_first_123");
-		expect(thirdHeaders.get("x-test-account-id")).toBe("acc-1");
-		expect(thirdHeaders.get("x-test-access-token")).toBe("access-alpha");
+		expect(thirdBody.previous_response_id).toBe("resp_second_456");
+		expect(thirdHeaders.get("x-test-account-id")).toBe("acc-2");
+		expect(thirdHeaders.get("x-test-access-token")).toBe("access-beta");
 	});
 	it("compacts fast-session input before sending the upstream request when compaction succeeds", async () => {
 		const fetchHelpers = await import("../lib/request/fetch-helpers.js");
@@ -2655,6 +2698,94 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 		dateNowSpy.mockRestore();
 	});
 
+	it("notifies preemptive quota scheduler on 503 overload responses", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("service unavailable", { status: 503 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
+	it("notifies preemptive quota scheduler on 502 overload responses", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("bad gateway", { status: 502 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
+	it("notifies preemptive quota scheduler on 529 overload responses", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("overloaded", { status: 529 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
+	it("does not notify preemptive quota scheduler on generic 500 server errors", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("internal server error", { status: 500 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).not.toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
 	it("falls back from gpt-5.3-codex to gpt-5.2-codex when unsupported fallback is enabled", async () => {
 		const configModule = await import("../lib/config.js");
 		const fetchHelpers = await import("../lib/request/fetch-helpers.js");
@@ -4080,10 +4211,31 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 		);
 	});
 
-	it("forwards short retry rate-limit toast arguments through showRuntimeToast", async () => {
+	it("configures rate-limit backoff with non-default config getter values during loader", async () => {
+		getRateLimitDedupWindowMsMock.mockImplementation(() => 3_210);
+		getRateLimitStateResetMsMock.mockImplementation(() => 654_321);
+		getRateLimitMaxBackoffMsMock.mockImplementation(() => 12_345);
+		getRateLimitShortRetryThresholdConfigMock.mockImplementation(() => 250);
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+
+		await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+
+		expect(configureRateLimitBackoffMock).toHaveBeenCalledWith({
+			dedupWindowMs: 3_210,
+			stateResetMs: 654_321,
+			maxBackoffMs: 12_345,
+			shortRetryThresholdMs: 250,
+		});
+	});
+
+	it("forwards short retry rate-limit toast arguments through showRuntimeToast when configured threshold allows retry", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
 		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+		getRateLimitShortRetryThresholdMock.mockReturnValue(1_500);
 
 		const markToastShown = vi.fn();
 		const manager = {
@@ -4174,10 +4326,11 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 		expect(markToastShown).toHaveBeenCalledWith(0);
 	});
 
-	it("does not short-retry the same account when the parsed cooldown exceeds the retry floor", async () => {
+	it("rotates when the configured short retry threshold is lower than the cooldown", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
 		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+		getRateLimitShortRetryThresholdMock.mockReturnValue(500);
 
 		const markRateLimitedWithReason = vi.fn();
 		const manager = {
@@ -4260,10 +4413,9 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 		expect(showRuntimeToastMock).not.toHaveBeenCalled();
 	});
 
-	it("applies the default cooldown when a 429 has no parsed retry metadata", async () => {
+	it("skips rate-limit marking when a 429 has no parsed retry metadata (entitlement-style)", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
-		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
 
 		const markRateLimitedWithReason = vi.fn();
 		const manager = {
@@ -4318,13 +4470,95 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 			rateLimit: undefined,
 			errorBody: "rate limited",
 		} as never);
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(response.status).toBe(429);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
+		expect(markRateLimitedWithReason).not.toHaveBeenCalled();
+	});
+
+	it("rotates account when upstream 404 usage_limit_reached is remapped to 429", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentOrNextForFamily: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({
+				type: "oauth" as const,
+				access: "access-token",
+				refresh: "refresh-1",
+				expires: Date.now() + 60_000,
+			}),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => true,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response(JSON.stringify({ error: { code: "usage_limit_reached", message: "Usage limit reached" } }), { status: 429 }),
+			rateLimit: {
+				retryAfterMs: 2 * 60 * 60 * 1000,
+				code: "usage_limit_reached",
+			},
+			errorBody: JSON.stringify({ error: { code: "usage_limit_reached", message: "Usage limit reached" } }),
+		} as never);
 		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({
-			attempt: 1,
-			delayMs: 5_000,
+			attempt: 2,
+			delayMs: 1000,
 		});
 		globalThis.fetch = vi
 			.fn()
-			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }));
+			.mockResolvedValueOnce(
+				new Response(JSON.stringify({ error: { code: "usage_limit_reached", message: "Usage limit reached" } }), { status: 404 }),
+			)
+			.mockResolvedValueOnce(new Response(JSON.stringify({ content: "ok" }), { status: 200 }));
 
 		const mockClient = createMockClient();
 		const { OpenAIOAuthPlugin } = await import("../index.js");
@@ -4336,15 +4570,473 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 		});
 
 		expect(response.status).toBe(503);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
 		expect(markRateLimitedWithReason).toHaveBeenCalledWith(
 			expect.objectContaining({ index: 0 }),
-			60_000,
+			2 * 60 * 60 * 1000,
 			"gpt-5.1",
-			"unknown",
+			expect.any(String),
 			"gpt-5.1",
 		);
 	});
 
+	it("retries same account on short-cooldown 429 without marking rate-limited", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentOrNextForFamily: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({ type: "oauth" as const, access: "access-token", refresh: "refresh-1", expires: Date.now() + 60_000 }),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => false,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+		// Short cooldown: 1000ms < 5000ms threshold -> retries same account, does NOT rotate
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response("rate limited", { status: 429 }),
+			rateLimit: { retryAfterMs: 1000, code: "rate_limit_exceeded" },
+			errorBody: "rate limited",
+		} as never);
+		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({ attempt: 1, delayMs: 500 });
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }))
+			.mockResolvedValueOnce(new Response(JSON.stringify({ content: "ok" }), { status: 200 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		// Short cooldown -> retries same account -> gets 200 on second attempt
+		expect(response.status).toBe(200);
+		// markRateLimitedWithReason is for long cooldowns; short retries skip it
+		expect(markRateLimitedWithReason).not.toHaveBeenCalled();
+		expect(globalThis.fetch).toHaveBeenCalledTimes(2);
+	});
+
+	it("does not rotate on 404 with unrelated body (not a usage limit)", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentOrNextForFamily: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({ type: "oauth" as const, access: "access-token", refresh: "refresh-1", expires: Date.now() + 60_000 }),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => false,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+		// 404 with unrelated body: handleErrorResponse returns 404 + no rateLimit
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response(JSON.stringify({ error: { code: "model_not_found" } }), { status: 404 }),
+			rateLimit: undefined,
+			errorBody: JSON.stringify({ error: { code: "model_not_found" } }),
+		} as never);
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response(JSON.stringify({ error: { code: "model_not_found" } }), { status: 404 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		// Falls through as a plain 404; no rate-limit rotation
+		expect(response.status).toBe(404);
+		expect(markRateLimitedWithReason).not.toHaveBeenCalled();
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
+	});
+
+	it("does not mark rate-limited when entitlement 404 is remapped to 403", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentOrNextForFamily: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({ type: "oauth" as const, access: "access-token", refresh: "refresh-1", expires: Date.now() + 60_000 }),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => false,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+		// Entitlement error: mapUsageLimit404WithBody remaps 404 -> 403 (not 429)
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response(
+				JSON.stringify({ error: { code: "usage_not_included", message: "Not included in your plan" } }),
+				{ status: 403 },
+			),
+			rateLimit: undefined,
+			errorBody: JSON.stringify({ error: { code: "usage_not_included", message: "Not included in your plan" } }),
+		} as never);
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response(JSON.stringify({ error: { code: "usage_not_included" } }), { status: 404 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		// Entitlement 403 passes through; no rate-limit rotation
+		expect(response.status).toBe(403);
+		expect(markRateLimitedWithReason).not.toHaveBeenCalled();
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
+	});
+
+	it("rotates account when upstream 404 body contains rate_limit_exceeded keyword", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentOrNextForFamily: () => ({ index: 0, accountId: "acc-1", email: "alpha@example.com", refreshToken: "refresh-1" }),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({ type: "oauth" as const, access: "access-token", refresh: "refresh-1", expires: Date.now() + 60_000 }),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => true,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response(
+				JSON.stringify({ error: { code: "rate_limit_exceeded", message: "Rate limit exceeded" } }),
+				{ status: 429 },
+			),
+			rateLimit: { retryAfterMs: 3 * 60 * 60 * 1000, code: "rate_limit_exceeded" },
+			errorBody: JSON.stringify({ error: { code: "rate_limit_exceeded", message: "Rate limit exceeded" } }),
+		} as never);
+		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({ attempt: 1, delayMs: 500 });
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response(JSON.stringify({ error: { code: "rate_limit_exceeded" } }), { status: 404 }))
+			.mockResolvedValueOnce(new Response(JSON.stringify({ content: "ok" }), { status: 200 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(response.status).toBe(503);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
+		expect(markRateLimitedWithReason).toHaveBeenCalledWith(
+			expect.objectContaining({ index: 0 }),
+			3 * 60 * 60 * 1000,
+			"gpt-5.1",
+			expect.any(String),
+			"gpt-5.1",
+		);
+	});
+
+	it("falls through to rotation after MAX_SHORT_RETRY_ATTEMPTS local short retries", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const recordRateLimit = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentOrNextForFamily: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({
+				type: "oauth" as const,
+				access: "access-token",
+				refresh: "refresh-1",
+				expires: Date.now() + 60_000,
+			}),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit,
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => true,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+		getRateLimitShortRetryThresholdMock.mockReturnValue(1_500);
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValue({
+			response: new Response("rate limited", { status: 429 }),
+			rateLimit: {
+				retryAfterMs: 1000,
+				code: "rate_limit_exceeded",
+			},
+			errorBody: "rate limited",
+		} as never);
+		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValue({
+			attempt: 1,
+			delayMs: 1000,
+		});
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }))
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }))
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }))
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+
+		vi.useFakeTimers();
+		const responsePromise = sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+		await vi.advanceTimersByTimeAsync(4_000);
+		const response = await responsePromise;
+
+		expect(response.status).toBe(503);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(4);
+		expect(markRateLimitedWithReason).toHaveBeenCalledWith(
+			expect.objectContaining({ index: 0 }),
+			1000,
+			"gpt-5.1",
+			expect.any(String),
+			"gpt-5.1",
+		);
+		expect(recordRateLimit).toHaveBeenCalled();
+	});
+
+	it("short-retries the same account when attempt is below MAX_SHORT_RETRY_ATTEMPTS", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentOrNextForFamily: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({
+				type: "oauth" as const,
+				access: "access-token",
+				refresh: "refresh-1",
+				expires: Date.now() + 60_000,
+			}),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => true,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+
+		// First request: 429 with attempt=2 (below MAX_SHORT_RETRY_ATTEMPTS=3) → short retry
+		// Second request: 200 OK
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response("rate limited", { status: 429 }),
+			rateLimit: {
+				retryAfterMs: 1000,
+				code: "rate_limit_exceeded",
+			},
+			errorBody: "rate limited",
+		} as never);
+		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({
+			attempt: 2,
+			delayMs: 1000,
+		});
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }))
+			.mockResolvedValueOnce(new Response(JSON.stringify({ content: "ok" }), { status: 200 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+
+		vi.useFakeTimers();
+		const responsePromise = sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+		await vi.advanceTimersByTimeAsync(2000);
+		const response = await responsePromise;
+
+		// Should have short-retried and succeeded
+		expect(response.status).toBe(200);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(2);
+		// markRateLimitedWithReason should NOT have been called (no rotation)
+		expect(markRateLimitedWithReason).not.toHaveBeenCalled();
+	});
+
 	it("persists the longer parsed rate-limit cooldown across overlapping requests", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const { AccountManager: ActualAccountManager } =
diff --git a/test/live-sync-entry.test.ts b/test/live-sync-entry.test.ts
index 60366734..f2926993 100644
--- a/test/live-sync-entry.test.ts
+++ b/test/live-sync-entry.test.ts
@@ -17,7 +17,10 @@ describe("live sync entry", () => {
 			} as never,
 			currentSync: null,
 			currentPath: null,
+			currentConfigKey: null,
 			getLiveAccountSync: () => true,
+			getLiveAccountSyncDebounceMs: () => 25,
+			getLiveAccountSyncPollMs: () => 250,
 			getStoragePath: () => "/tmp/accounts.json",
 			createSync: vi.fn(() => ({ stop: vi.fn(), syncToPath: vi.fn() })),
 			registerCleanup: vi.fn(),
@@ -30,6 +33,7 @@ describe("live sync entry", () => {
 			expect.objectContaining({
 				enabled: true,
 				targetPath: "/tmp/accounts.json",
+				configKey: "25:250",
 				pluginName: "plugin",
 			}),
 		);
diff --git a/test/parallel-probe.test.ts b/test/parallel-probe.test.ts
index e3fb05a1..ebeb22ec 100644
--- a/test/parallel-probe.test.ts
+++ b/test/parallel-probe.test.ts
@@ -77,6 +77,19 @@ describe("parallel-probe", () => {
 			expect(result?.error?.message).toBe("network error");
 		});
 
+		it("normalizes non-Error probe failures for single candidates", async () => {
+			const account = createMockAccount(0);
+			const candidates = createProbeCandidates([account]);
+
+			const result = await probeAccountsInParallel(candidates, async () => {
+				throw "string failure";
+			});
+
+			expect(result?.type).toBe("failure");
+			expect(result?.error).toBeInstanceOf(Error);
+			expect(result?.error?.message).toBe("string failure");
+		});
+
 		it("returns first success in parallel probing", async () => {
 			const accounts = [createMockAccount(0), createMockAccount(1), createMockAccount(2)];
 			const candidates = createProbeCandidates(accounts);
@@ -174,6 +187,23 @@ describe("parallel-probe", () => {
 	});
 
 	describe("getTopCandidates", () => {
+		it("accepts named params without overload casts", () => {
+			const accounts = [createMockAccount(0), createMockAccount(1)];
+			const mockManager = {
+				getAccountsSnapshot: vi.fn().mockReturnValue(accounts),
+			};
+
+			const candidates = getTopCandidates({
+				accountManager: mockManager as AccountManager,
+				modelFamily: "codex",
+				model: null,
+				maxCandidates: 1,
+			});
+
+			expect(candidates).toHaveLength(1);
+			expect(mockManager.getAccountsSnapshot).toHaveBeenCalledTimes(1);
+		});
+
 		it("returns empty array when no accounts available", () => {
 			const mockManager = {
 				getAccountsSnapshot: vi.fn().mockReturnValue([]),
diff --git a/test/plugin-config.test.ts b/test/plugin-config.test.ts
index d30c1f70..61ed8abc 100644
--- a/test/plugin-config.test.ts
+++ b/test/plugin-config.test.ts
@@ -16,6 +16,10 @@ import {
 	getFallbackToGpt52OnUnsupportedGpt53,
 	getUnsupportedCodexFallbackChain,
 	getFetchTimeoutMs,
+	getRateLimitDedupWindowMs,
+	getRateLimitMaxBackoffMs,
+	getRateLimitShortRetryThresholdMs,
+	getRateLimitStateResetMs,
 	getStreamStallTimeoutMs,
 	getPreemptiveQuotaEnabled,
 	getPreemptiveQuotaRemainingPercent5h,
@@ -71,6 +75,10 @@ describe('Plugin Configuration', () => {
 		'CODEX_AUTH_PREEMPTIVE_QUOTA_5H_REMAINING_PCT',
 		'CODEX_AUTH_PREEMPTIVE_QUOTA_7D_REMAINING_PCT',
 		'CODEX_AUTH_PREEMPTIVE_QUOTA_MAX_DEFERRAL_MS',
+		'CODEX_AUTH_RATE_LIMIT_DEDUP_WINDOW_MS',
+		'CODEX_AUTH_RATE_LIMIT_STATE_RESET_MS',
+		'CODEX_AUTH_RATE_LIMIT_MAX_BACKOFF_MS',
+		'CODEX_AUTH_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS',
 	] as const;
 	const originalEnv: Partial<Record<(typeof envKeys)[number], string | undefined>> = {};
 
@@ -107,9 +115,9 @@ describe('Plugin Configuration', () => {
 				fastSession: false,
 				fastSessionStrategy: 'hybrid',
 				fastSessionMaxInputItems: 30,
-				retryAllAccountsRateLimited: true,
+				retryAllAccountsRateLimited: false,
 				retryAllAccountsMaxWaitMs: 0,
-				retryAllAccountsMaxRetries: Infinity,
+				retryAllAccountsMaxRetries: 0,
 				unsupportedCodexPolicy: 'strict',
 				fallbackOnUnsupportedCodexModel: false,
 				fallbackToGpt52OnUnsupportedGpt53: true,
@@ -124,6 +132,10 @@ describe('Plugin Configuration', () => {
 				parallelProbingMaxConcurrency: 2,
 				emptyResponseMaxRetries: 2,
 				emptyResponseRetryDelayMs: 1_000,
+				rateLimitDedupWindowMs: 2_000,
+				rateLimitStateResetMs: 120_000,
+				rateLimitMaxBackoffMs: 60_000,
+				rateLimitShortRetryThresholdMs: 5_000,
 				pidOffsetEnabled: false,
 				fetchTimeoutMs: 60_000,
 				streamStallTimeoutMs: 45_000,
@@ -167,9 +179,9 @@ describe('Plugin Configuration', () => {
 				fastSession: false,
 				fastSessionStrategy: 'hybrid',
 				fastSessionMaxInputItems: 30,
-				retryAllAccountsRateLimited: true,
+				retryAllAccountsRateLimited: false,
 				retryAllAccountsMaxWaitMs: 0,
-				retryAllAccountsMaxRetries: Infinity,
+				retryAllAccountsMaxRetries: 0,
 				unsupportedCodexPolicy: 'strict',
 				fallbackOnUnsupportedCodexModel: false,
 				fallbackToGpt52OnUnsupportedGpt53: true,
@@ -184,6 +196,10 @@ describe('Plugin Configuration', () => {
 				parallelProbingMaxConcurrency: 2,
 				emptyResponseMaxRetries: 2,
 				emptyResponseRetryDelayMs: 1_000,
+				rateLimitDedupWindowMs: 2_000,
+				rateLimitStateResetMs: 120_000,
+				rateLimitMaxBackoffMs: 60_000,
+				rateLimitShortRetryThresholdMs: 5_000,
 				pidOffsetEnabled: false,
 				fetchTimeoutMs: 60_000,
 				streamStallTimeoutMs: 45_000,
@@ -433,9 +449,9 @@ describe('Plugin Configuration', () => {
 				fastSession: false,
 				fastSessionStrategy: 'hybrid',
 				fastSessionMaxInputItems: 30,
-				retryAllAccountsRateLimited: true,
+				retryAllAccountsRateLimited: false,
 				retryAllAccountsMaxWaitMs: 0,
-				retryAllAccountsMaxRetries: Infinity,
+				retryAllAccountsMaxRetries: 0,
 				unsupportedCodexPolicy: 'strict',
 				fallbackOnUnsupportedCodexModel: false,
 				fallbackToGpt52OnUnsupportedGpt53: true,
@@ -450,6 +466,10 @@ describe('Plugin Configuration', () => {
 				parallelProbingMaxConcurrency: 2,
 				emptyResponseMaxRetries: 2,
 				emptyResponseRetryDelayMs: 1_000,
+				rateLimitDedupWindowMs: 2_000,
+				rateLimitStateResetMs: 120_000,
+				rateLimitMaxBackoffMs: 60_000,
+				rateLimitShortRetryThresholdMs: 5_000,
 				pidOffsetEnabled: false,
 				fetchTimeoutMs: 60_000,
 				streamStallTimeoutMs: 45_000,
@@ -499,9 +519,9 @@ describe('Plugin Configuration', () => {
 		fastSession: false,
 		fastSessionStrategy: 'hybrid',
 		fastSessionMaxInputItems: 30,
-		retryAllAccountsRateLimited: true,
+		retryAllAccountsRateLimited: false,
 		retryAllAccountsMaxWaitMs: 0,
-		retryAllAccountsMaxRetries: Infinity,
+		retryAllAccountsMaxRetries: 0,
 		unsupportedCodexPolicy: 'strict',
 		fallbackOnUnsupportedCodexModel: false,
 		fallbackToGpt52OnUnsupportedGpt53: true,
@@ -516,6 +536,10 @@ describe('Plugin Configuration', () => {
 		parallelProbingMaxConcurrency: 2,
 		emptyResponseMaxRetries: 2,
 		emptyResponseRetryDelayMs: 1_000,
+		rateLimitDedupWindowMs: 2_000,
+		rateLimitStateResetMs: 120_000,
+		rateLimitMaxBackoffMs: 60_000,
+		rateLimitShortRetryThresholdMs: 5_000,
 		pidOffsetEnabled: false,
 		fetchTimeoutMs: 60_000,
 		streamStallTimeoutMs: 45_000,
@@ -559,9 +583,9 @@ describe('Plugin Configuration', () => {
 			fastSession: false,
 			fastSessionStrategy: 'hybrid',
 			fastSessionMaxInputItems: 30,
-			retryAllAccountsRateLimited: true,
+			retryAllAccountsRateLimited: false,
 			retryAllAccountsMaxWaitMs: 0,
-			retryAllAccountsMaxRetries: Infinity,
+			retryAllAccountsMaxRetries: 0,
 			unsupportedCodexPolicy: 'strict',
 			fallbackOnUnsupportedCodexModel: false,
 			fallbackToGpt52OnUnsupportedGpt53: true,
@@ -576,6 +600,10 @@ describe('Plugin Configuration', () => {
 			parallelProbingMaxConcurrency: 2,
 			emptyResponseMaxRetries: 2,
 			emptyResponseRetryDelayMs: 1_000,
+			rateLimitDedupWindowMs: 2_000,
+			rateLimitStateResetMs: 120_000,
+			rateLimitMaxBackoffMs: 60_000,
+			rateLimitShortRetryThresholdMs: 5_000,
 			pidOffsetEnabled: false,
 			fetchTimeoutMs: 60_000,
 			streamStallTimeoutMs: 45_000,
@@ -1046,6 +1074,30 @@ describe('Plugin Configuration', () => {
 		});
 	});
 
+	describe('rate-limit backoff settings', () => {
+		it('uses defaults when no overrides are provided', () => {
+			expect(getRateLimitDedupWindowMs({})).toBe(2_000);
+			expect(getRateLimitStateResetMs({})).toBe(120_000);
+			expect(getRateLimitMaxBackoffMs({})).toBe(60_000);
+			expect(getRateLimitShortRetryThresholdMs({})).toBe(5_000);
+		});
+
+		it('prioritizes environment overrides for backoff settings', () => {
+			process.env.CODEX_AUTH_RATE_LIMIT_DEDUP_WINDOW_MS = '3000';
+			process.env.CODEX_AUTH_RATE_LIMIT_STATE_RESET_MS = '180000';
+			process.env.CODEX_AUTH_RATE_LIMIT_MAX_BACKOFF_MS = '90000';
+			process.env.CODEX_AUTH_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS = '7000';
+			expect(getRateLimitDedupWindowMs({ rateLimitDedupWindowMs: 1_000 })).toBe(3_000);
+			expect(getRateLimitStateResetMs({ rateLimitStateResetMs: 60_000 })).toBe(180_000);
+			expect(getRateLimitMaxBackoffMs({ rateLimitMaxBackoffMs: 30_000 })).toBe(90_000);
+			expect(getRateLimitShortRetryThresholdMs({ rateLimitShortRetryThresholdMs: 2_000 })).toBe(7_000);
+			delete process.env.CODEX_AUTH_RATE_LIMIT_DEDUP_WINDOW_MS;
+			delete process.env.CODEX_AUTH_RATE_LIMIT_STATE_RESET_MS;
+			delete process.env.CODEX_AUTH_RATE_LIMIT_MAX_BACKOFF_MS;
+			delete process.env.CODEX_AUTH_RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS;
+		});
+	});
+
 	describe('preemptive quota settings', () => {
 		it('should use default thresholds', () => {
 			expect(getPreemptiveQuotaEnabled({})).toBe(true);
diff --git a/test/preemptive-quota-scheduler.test.ts b/test/preemptive-quota-scheduler.test.ts
index cb6bb61c..fa393753 100644
--- a/test/preemptive-quota-scheduler.test.ts
+++ b/test/preemptive-quota-scheduler.test.ts
@@ -87,6 +87,21 @@ describe("preemptive quota scheduler", () => {
 		expect(decision.waitMs).toBe(25_000);
 	});
 
+	it("uses the longest active reset window for 429 deferrals", () => {
+		const scheduler = new PreemptiveQuotaScheduler();
+		scheduler.update("acc:model", {
+			status: 429,
+			primary: { resetAtMs: 31_000 },
+			secondary: { resetAtMs: 61_000 },
+			updatedAt: 1_000,
+		});
+
+		const decision = scheduler.getDeferral("acc:model", 6_000);
+		expect(decision.defer).toBe(true);
+		expect(decision.reason).toBe("rate-limit");
+		expect(decision.waitMs).toBe(55_000);
+	});
+
 	it("preserves secondary near-exhaustion state when marking a quota key rate-limited", () => {
 		const scheduler = new PreemptiveQuotaScheduler({
 			remainingPercentThresholdSecondary: 5,
@@ -141,6 +156,24 @@ describe("preemptive quota scheduler", () => {
 		expect(decision.reason).toBe("quota-near-exhaustion");
 	});
 
+	it("uses the longest near-exhausted reset window for quota deferrals", () => {
+		const scheduler = new PreemptiveQuotaScheduler({
+			remainingPercentThresholdPrimary: 5,
+			remainingPercentThresholdSecondary: 5,
+		});
+		scheduler.update("acc:model", {
+			status: 200,
+			primary: { usedPercent: 96, resetAtMs: 70_000 },
+			secondary: { usedPercent: 97, resetAtMs: 120_000 },
+			updatedAt: 10_000,
+		});
+
+		const decision = scheduler.getDeferral("acc:model", 20_000);
+		expect(decision.defer).toBe(true);
+		expect(decision.reason).toBe("quota-near-exhaustion");
+		expect(decision.waitMs).toBe(100_000);
+	});
+
 	it("prunes expired snapshots", () => {
 		const scheduler = new PreemptiveQuotaScheduler();
 		scheduler.update("a", {
diff --git a/test/rate-limit-backoff.test.ts b/test/rate-limit-backoff.test.ts
index 107772f8..5b01b0fe 100644
--- a/test/rate-limit-backoff.test.ts
+++ b/test/rate-limit-backoff.test.ts
@@ -1,8 +1,11 @@
 import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
 import {
+	configureRateLimitBackoff,
 	clearRateLimitBackoffState,
 	getRateLimitBackoff,
+	getRateLimitShortRetryThresholdMs,
 	resetRateLimitBackoff,
+	resetRateLimitBackoffConfig,
 	calculateBackoffMs,
 	getRateLimitBackoffWithReason,
 } from "../lib/request/rate-limit-backoff.js";
@@ -12,10 +15,14 @@ describe("Rate limit backoff", () => {
 		vi.useFakeTimers();
 		vi.setSystemTime(new Date(0));
 		clearRateLimitBackoffState();
+		vi.spyOn(Math, "random").mockReturnValue(0.5);
+		resetRateLimitBackoffConfig();
 	});
 
 	afterEach(() => {
 		clearRateLimitBackoffState();
+		vi.restoreAllMocks();
+		resetRateLimitBackoffConfig();
 		vi.useRealTimers();
 	});
 
@@ -39,6 +46,24 @@ describe("Rate limit backoff", () => {
 		expect(second.isDuplicate).toBe(false);
 	});
 
+	it("applies jitter to new backoff windows but keeps duplicate retries stable", () => {
+		vi.mocked(Math.random).mockReturnValueOnce(1);
+		const first = getRateLimitBackoff(4, "jitter-test", 1000);
+		expect(first.delayMs).toBe(1200);
+
+		vi.setSystemTime(new Date(1000));
+		vi.mocked(Math.random).mockReturnValueOnce(0);
+		const duplicate = getRateLimitBackoff(4, "jitter-test", 1000);
+		expect(duplicate.delayMs).toBe(1200);
+		expect(duplicate.isDuplicate).toBe(true);
+
+		vi.setSystemTime(new Date(2500));
+		vi.mocked(Math.random).mockReturnValueOnce(0);
+		const second = getRateLimitBackoff(4, "jitter-test", 1000);
+		expect(second.delayMs).toBe(1600);
+		expect(second.isDuplicate).toBe(false);
+	});
+
 	it("resets after quiet period", () => {
 		getRateLimitBackoff(0, "codex", 1000);
 		vi.setSystemTime(new Date(121_000));
@@ -54,6 +79,73 @@ describe("Rate limit backoff", () => {
 		expect(next.isDuplicate).toBe(false);
 	});
 
+	it("uses configurable dedup and state reset windows", () => {
+		configureRateLimitBackoff({
+			dedupWindowMs: 5_000,
+			stateResetMs: 10_000,
+		});
+		getRateLimitBackoff(0, "codex", 1000);
+
+		vi.setSystemTime(new Date(3_000));
+		expect(getRateLimitBackoff(0, "codex", 1000).isDuplicate).toBe(true);
+
+		vi.setSystemTime(new Date(11_000));
+		expect(getRateLimitBackoff(0, "codex", 1000).attempt).toBe(1);
+	});
+
+	it("does not carry rate-limit state across slot reuse when the stable account key changes", () => {
+		getRateLimitBackoff(0, "codex", 1000, "acc-1");
+
+		vi.setSystemTime(new Date(2_500));
+		const nextAccount = getRateLimitBackoff(0, "codex", 1000, "acc-2");
+
+		expect(nextAccount.attempt).toBe(1);
+		expect(nextAccount.isDuplicate).toBe(false);
+	});
+
+	it("keeps concurrent config updates on one complete config profile", async () => {
+		const profiles = [
+			{
+				dedupWindowMs: 500,
+				stateResetMs: 3_000,
+				maxBackoffMs: 7_000,
+				shortRetryThresholdMs: 1_100,
+			},
+			{
+				dedupWindowMs: 9_000,
+				stateResetMs: 20_000,
+				maxBackoffMs: 17_000,
+				shortRetryThresholdMs: 9_100,
+			},
+		] as const;
+
+		await Promise.all(
+			profiles.map(async (profile) => {
+				await Promise.resolve();
+				configureRateLimitBackoff(profile);
+			}),
+		);
+
+		const activeProfile = profiles.find(
+			(profile) =>
+				profile.shortRetryThresholdMs === getRateLimitShortRetryThresholdMs(),
+		);
+
+		expect(activeProfile).toBeDefined();
+		expect(calculateBackoffMs(1000, 20, "quota")).toBe(
+			activeProfile!.maxBackoffMs,
+		);
+
+		const first = getRateLimitBackoff(20, "concurrent", 1000);
+		expect(first.isDuplicate).toBe(false);
+
+		vi.setSystemTime(new Date(activeProfile!.dedupWindowMs - 1));
+		expect(getRateLimitBackoff(20, "concurrent", 1000).isDuplicate).toBe(true);
+
+		vi.setSystemTime(new Date(activeProfile!.stateResetMs + 1));
+		expect(getRateLimitBackoff(20, "concurrent", 1000).attempt).toBe(1);
+	});
+
 	describe("calculateBackoffMs", () => {
 		it("applies quota multiplier (3.0)", () => {
 			const result = calculateBackoffMs(1000, 1, "quota");
@@ -96,8 +188,20 @@ describe("Rate limit backoff", () => {
 
 		it("uses fallback multiplier 1.0 when reason is not in map (line 111 coverage)", () => {
 			const result = calculateBackoffMs(1000, 1, "unknown-reason" as never);
-			expect(result).toBe(1000);
+		expect(result).toBe(1000);
 		});
+
+		it("uses configurable max backoff cap", () => {
+			configureRateLimitBackoff({ maxBackoffMs: 12_000 });
+			const result = calculateBackoffMs(1000, 20, "quota");
+			expect(result).toBe(12_000);
+		});
+	});
+
+	it("exposes configurable short retry threshold", () => {
+		expect(getRateLimitShortRetryThresholdMs()).toBe(5_000);
+		configureRateLimitBackoff({ shortRetryThresholdMs: 9_000 });
+		expect(getRateLimitShortRetryThresholdMs()).toBe(9_000);
 	});
 
 	describe("normalizeDelayMs edge cases (line 32 coverage)", () => {
diff --git a/test/repair-commands.test.ts b/test/repair-commands.test.ts
index 0853aee7..bb726228 100644
--- a/test/repair-commands.test.ts
+++ b/test/repair-commands.test.ts
@@ -751,6 +751,25 @@ describe("repair-commands direct deps coverage", () => {
 		);
 	});
 
+	it("runDoctor marks malformed codex auth payloads as invalid instead of healthy", async () => {
+		existsSyncMock.mockImplementation((path) => path === "/mock/auth.json");
+		readFileMock.mockResolvedValueOnce("[]");
+		const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+
+		const exitCode = await runDoctor(["--json"], createDeps());
+
+		expect(exitCode).toBe(1);
+		expect(
+			JSON.parse(String(consoleSpy.mock.calls.at(-1)?.[0] ?? "{}")).checks,
+		).toContainEqual(
+			expect.objectContaining({
+				key: "codex-auth-readable",
+				severity: "error",
+				message: "Codex auth file has invalid structure",
+			}),
+		);
+	});
+
 	it("runDoctor derives auto-fix state from the final action set", async () => {
 		const now = Date.now();
 		let persistedAccountStorage: unknown;
diff --git a/test/request-attempt-budget.test.ts b/test/request-attempt-budget.test.ts
new file mode 100644
index 00000000..c4e86399
--- /dev/null
+++ b/test/request-attempt-budget.test.ts
@@ -0,0 +1,42 @@
+import { describe, expect, it } from "vitest";
+import {
+	buildStreamFailoverCandidateOrder,
+	capStreamFailoverMax,
+	computeOutboundRequestAttemptBudget,
+} from "../lib/request/request-attempt-budget.js";
+
+describe("request attempt budget", () => {
+	it("caps stream failover to a single retry", () => {
+		expect(capStreamFailoverMax(0)).toBe(0);
+		expect(capStreamFailoverMax(1)).toBe(1);
+		expect(capStreamFailoverMax(3)).toBe(1);
+	});
+
+	it("caps outbound request budgets for large account pools", () => {
+		expect(
+			computeOutboundRequestAttemptBudget({
+				accountCount: 12,
+				maxSameAccountRetries: 2,
+				emptyResponseMaxRetries: 2,
+				streamFailoverMax: 3,
+			}),
+		).toBe(6);
+	});
+
+	it("sanitizes non-finite retry inputs to deterministic defaults", () => {
+		expect(
+			computeOutboundRequestAttemptBudget({
+				accountCount: Number.NaN,
+				maxSameAccountRetries: Number.POSITIVE_INFINITY,
+				emptyResponseMaxRetries: Number.NEGATIVE_INFINITY,
+				streamFailoverMax: Number.NaN,
+			}),
+		).toBe(1);
+	});
+
+	it("keeps the primary stream account plus at most one alternate", () => {
+		expect(
+			buildStreamFailoverCandidateOrder(2, [2, 5, 2, 7, 9]),
+		).toEqual([2, 5]);
+	});
+});
diff --git a/test/request-resilience.test.ts b/test/request-resilience.test.ts
new file mode 100644
index 00000000..4de12c0e
--- /dev/null
+++ b/test/request-resilience.test.ts
@@ -0,0 +1,79 @@
+import { describe, expect, it } from "vitest";
+import {
+	armPoolExhaustionCooldown,
+	buildAdaptiveStreamFailoverCandidateOrder,
+	clearPoolExhaustionCooldown,
+	clearServerBurstCooldown,
+	getPoolExhaustionCooldownRemaining,
+	getServerBurstCooldownRemaining,
+	recordServerBurstFailure,
+	resetRequestResilienceStateForTests,
+} from "../lib/request/request-resilience.js";
+
+describe("request resilience helpers", () => {
+	it("arms and clears the pool exhaustion cooldown", () => {
+		resetRequestResilienceStateForTests();
+		const now = Date.parse("2026-04-06T00:00:00.000Z");
+		armPoolExhaustionCooldown(5_000, now);
+		expect(getPoolExhaustionCooldownRemaining(now + 1_000)).toBe(14_000);
+		clearPoolExhaustionCooldown();
+		expect(getPoolExhaustionCooldownRemaining(now + 1_000)).toBe(0);
+	});
+
+	it("keeps pool exhaustion cooldown monotonic when re-armed with a shorter wait", () => {
+		resetRequestResilienceStateForTests();
+		const now = Date.parse("2026-04-06T00:00:00.000Z");
+		const first = armPoolExhaustionCooldown(30_000, now);
+		const second = armPoolExhaustionCooldown(5_000, now + 1_000);
+
+		expect(second).toBe(first);
+		expect(getPoolExhaustionCooldownRemaining(now + 2_000)).toBe(28_000);
+	});
+
+	it("arms a short server burst cooldown after repeated multi-account 5xx failures", () => {
+		resetRequestResilienceStateForTests();
+		const now = Date.parse("2026-04-06T00:00:00.000Z");
+		expect(recordServerBurstFailure(0, now)).toBe(0);
+		expect(recordServerBurstFailure(1, now + 500)).toBe(0);
+		const cooldownUntil = recordServerBurstFailure(2, now + 1_000);
+		expect(cooldownUntil).toBeGreaterThan(now + 1_000);
+		expect(getServerBurstCooldownRemaining(now + 2_000)).toBeGreaterThan(0);
+		clearServerBurstCooldown();
+		expect(getServerBurstCooldownRemaining(now + 2_000)).toBe(0);
+	});
+
+	it("keeps an armed server burst cooldown active across later failures", () => {
+		resetRequestResilienceStateForTests();
+		const now = Date.parse("2026-04-06T00:00:00.000Z");
+		expect(recordServerBurstFailure(0, now)).toBe(0);
+		expect(recordServerBurstFailure(1, now + 5_000)).toBe(0);
+		const cooldownUntil = recordServerBurstFailure(2, now + 9_000);
+		expect(cooldownUntil).toBeGreaterThan(now + 9_000);
+
+		const laterFailure = recordServerBurstFailure(3, now + 12_000);
+		expect(laterFailure).toBe(cooldownUntil);
+		expect(getServerBurstCooldownRemaining(now + 12_500)).toBeGreaterThan(0);
+	});
+
+	it("prefers the freshest eligible alternate account for stream failover", () => {
+		const now = Date.parse("2026-04-06T00:00:00.000Z");
+		expect(
+			buildAdaptiveStreamFailoverCandidateOrder(
+				0,
+				[
+					{ index: 0, lastUsed: now - 5_000, enabled: true, rateLimitResetTimes: {} },
+					{ index: 1, lastUsed: now - 20_000, enabled: true, rateLimitResetTimes: {} },
+					{ index: 2, lastUsed: now - 1_000, enabled: true, rateLimitResetTimes: {} },
+					{
+						index: 3,
+						lastUsed: now,
+						enabled: true,
+						coolingDownUntil: now + 10_000,
+						rateLimitResetTimes: {},
+					},
+				],
+				now,
+			),
+		).toEqual([0, 2]);
+	});
+});
diff --git a/test/response-metadata.test.ts b/test/response-metadata.test.ts
index 36853f7e..410afd10 100644
--- a/test/response-metadata.test.ts
+++ b/test/response-metadata.test.ts
@@ -23,10 +23,10 @@ describe("response metadata helpers", () => {
 		expect(parseRetryAfterHintMs(headers)).toBe(1200);
 	});
 
-	it("parses retry-after seconds and caps large values", () => {
+	it("parses retry-after seconds and caps extreme values to one day", () => {
 		const headers = new Headers({ "retry-after": "999999" });
 
-		expect(parseRetryAfterHintMs(headers)).toBe(300000);
+		expect(parseRetryAfterHintMs(headers)).toBe(86_400_000);
 	});
 
 	it("parses retry-after dates and x-ratelimit-reset timestamps", () => {
diff --git a/test/runtime-live-sync.test.ts b/test/runtime-live-sync.test.ts
index d9861918..19b6f0f7 100644
--- a/test/runtime-live-sync.test.ts
+++ b/test/runtime-live-sync.test.ts
@@ -21,6 +21,7 @@ describe("runtime live sync", () => {
 		let committedState = {
 			sync: overrides.currentSync ?? null,
 			path: overrides.currentPath ?? null,
+			configKey: null as string | null,
 			cleanupRegistered: overrides.currentCleanupRegistered ?? false,
 		};
 		let cleanupCallback: (() => void) | null = null;
@@ -92,12 +93,14 @@ describe("runtime live sync", () => {
 		await expect(ensureRuntimeLiveAccountSync(deps)).resolves.toEqual({
 			sync: null,
 			path: null,
+			configKey: null,
 			cleanupRegistered: true,
 		});
 		expect(currentSync.stop).toHaveBeenCalledTimes(1);
 		expect(deps.commitState).toHaveBeenCalledWith({
 			sync: null,
 			path: null,
+			configKey: null,
 			cleanupRegistered: true,
 		});
 	});
@@ -111,6 +114,7 @@ describe("runtime live sync", () => {
 		expect(createSync).toHaveBeenCalledTimes(1);
 		expect(registerCleanup).toHaveBeenCalledTimes(1);
 		expect(first.path).toBe("C:\\repo\\accounts.json");
+		expect(first.configKey).toBe("25:250");
 		expect(first.cleanupRegistered).toBe(true);
 		expect(first.sync?.syncToPath).toHaveBeenCalledWith(
 			"C:\\repo\\accounts.json",
@@ -120,6 +124,7 @@ describe("runtime live sync", () => {
 			...deps,
 			currentSync: first.sync,
 			currentPath: first.path,
+			currentConfigKey: first.configKey,
 			currentCleanupRegistered: first.cleanupRegistered,
 		});
 
@@ -151,6 +156,7 @@ describe("runtime live sync", () => {
 		await expect(pending).resolves.toMatchObject({
 			sync: currentSync,
 			path: "C:\\repo\\new.json",
+			configKey: null,
 			cleanupRegistered: true,
 		});
 		expect(currentSync.syncToPath).toHaveBeenCalledTimes(3);
@@ -176,6 +182,7 @@ describe("runtime live sync", () => {
 		await expect(pending).resolves.toMatchObject({
 			sync: currentSync,
 			path: "C:\\repo\\old.json",
+			configKey: null,
 			cleanupRegistered: true,
 		});
 		expect(currentSync.syncToPath).toHaveBeenCalledTimes(3);
@@ -213,6 +220,7 @@ describe("runtime live sync", () => {
 		const committed = getCommittedState();
 		expect(committed.sync).toBe(createdSync);
 		expect(committed.path).toBeNull();
+		expect(committed.configKey).toBe("25:250");
 		expect(committed.cleanupRegistered).toBe(true);
 
 		const cleanup = getCleanupCallback();
@@ -238,12 +246,14 @@ describe("runtime live sync", () => {
 
 		const committed = getCommittedState();
 		expect(committed.sync).toBe(createdSync);
+		expect(committed.configKey).toBe("25:250");
 		expect(committed.cleanupRegistered).toBe(true);
 
 		const second = ensureRuntimeLiveAccountSync({
 			...deps,
 			currentSync: committed.sync,
 			currentPath: committed.path,
+			currentConfigKey: committed.configKey,
 			currentCleanupRegistered: committed.cleanupRegistered,
 		});
 		await vi.runAllTicks();
@@ -253,11 +263,13 @@ describe("runtime live sync", () => {
 		await expect(pending).resolves.toMatchObject({
 			sync: createdSync,
 			path: "C:\\repo\\accounts.json",
+			configKey: "25:250",
 			cleanupRegistered: true,
 		});
 		await expect(second).resolves.toMatchObject({
 			sync: createdSync,
 			path: "C:\\repo\\accounts.json",
+			configKey: "25:250",
 			cleanupRegistered: true,
 		});
 	});
@@ -273,6 +285,7 @@ describe("runtime live sync", () => {
 			...deps,
 			currentSync: first.sync,
 			currentPath: first.path,
+			currentConfigKey: first.configKey,
 			currentCleanupRegistered: first.cleanupRegistered,
 			getLiveAccountSync: vi.fn().mockReturnValue(false),
 		});
@@ -282,6 +295,7 @@ describe("runtime live sync", () => {
 			...deps,
 			currentSync: disabled.sync,
 			currentPath: disabled.path,
+			currentConfigKey: disabled.configKey,
 			currentCleanupRegistered: disabled.cleanupRegistered,
 		});
 		setLiveSync(reenabled.sync);
@@ -293,4 +307,36 @@ describe("runtime live sync", () => {
 		expect((reenabled.sync as { stop: ReturnType<typeof vi.fn> }).stop).toHaveBeenCalledTimes(1);
 		expect((first.sync as { stop: ReturnType<typeof vi.fn> }).stop).toHaveBeenCalledTimes(1);
 	});
+
+	it("recreates live sync when debounce/poll settings change", async () => {
+		const firstSync = {
+			stop: vi.fn(),
+			syncToPath: vi.fn().mockResolvedValue(undefined),
+		};
+		const secondSync = {
+			stop: vi.fn(),
+			syncToPath: vi.fn().mockResolvedValue(undefined),
+		};
+		const { deps, createSync } = createDeps({
+			currentSync: firstSync,
+			currentPath: "C:\\repo\\accounts.json",
+			currentCleanupRegistered: true,
+		});
+		createSync.mockReturnValue(secondSync);
+		deps.getLiveAccountSyncDebounceMs = vi.fn().mockReturnValue(50);
+		deps.getLiveAccountSyncPollMs = vi.fn().mockReturnValue(500);
+
+		const result = await ensureRuntimeLiveAccountSync({
+			...deps,
+			currentSync: firstSync,
+			currentPath: "C:\\repo\\accounts.json",
+			currentConfigKey: "25:250",
+		});
+
+		expect(firstSync.stop).toHaveBeenCalledTimes(1);
+		expect(createSync).toHaveBeenCalledTimes(1);
+		expect(result.sync).toBe(secondSync);
+		expect(result.configKey).toBe("50:500");
+		expect(secondSync.syncToPath).toHaveBeenCalledWith("C:\\repo\\accounts.json");
+	});
 });
diff --git a/test/runtime-metrics.test.ts b/test/runtime-metrics.test.ts
deleted file mode 100644
index f4fbb44c..00000000
--- a/test/runtime-metrics.test.ts
+++ /dev/null
@@ -1,88 +0,0 @@
-import { describe, expect, it } from "vitest";
-import {
-	MAX_RETRY_HINT_MS,
-	clampRetryHintMs,
-	createRuntimeMetrics,
-	parseEnvInt,
-	parseFailoverMode,
-	parseRetryAfterHintMs,
-	sanitizeResponseHeadersForLog,
-} from "../lib/runtime/metrics.js";
-
-describe("runtime metrics helpers", () => {
-	it("creates zeroed runtime metrics from an injected timestamp", () => {
-		expect(createRuntimeMetrics(1234)).toEqual({
-			startedAt: 1234,
-			totalRequests: 0,
-			successfulRequests: 0,
-			failedRequests: 0,
-			rateLimitedResponses: 0,
-			serverErrors: 0,
-			networkErrors: 0,
-			userAborts: 0,
-			authRefreshFailures: 0,
-			emptyResponseRetries: 0,
-			accountRotations: 0,
-			sameAccountRetries: 0,
-			streamFailoverAttempts: 0,
-			streamFailoverRecoveries: 0,
-			streamFailoverCrossAccountRecoveries: 0,
-			cumulativeLatencyMs: 0,
-			lastRequestAt: null,
-			lastError: null,
-		});
-	});
-
-	it("parses failover modes and integer env overrides conservatively", () => {
-		expect(parseFailoverMode("aggressive")).toBe("aggressive");
-		expect(parseFailoverMode(" conservative ")).toBe("conservative");
-		expect(parseFailoverMode("other")).toBe("balanced");
-		expect(parseEnvInt("42")).toBe(42);
-		expect(parseEnvInt("abc")).toBeUndefined();
-		expect(parseEnvInt(undefined)).toBeUndefined();
-	});
-
-	it("clamps retry hints and drops invalid values", () => {
-		expect(clampRetryHintMs(-1)).toBeNull();
-		expect(clampRetryHintMs(Number.NaN)).toBeNull();
-		expect(clampRetryHintMs(MAX_RETRY_HINT_MS + 1000)).toBe(MAX_RETRY_HINT_MS);
-		expect(clampRetryHintMs(2500.9)).toBe(2500);
-	});
-
-	it("parses retry-after headers across ms, seconds, date, and reset formats", () => {
-		const now = Date.parse("2026-03-22T00:00:00.000Z");
-
-		const retryAfterMsHeaders = new Headers({ "retry-after-ms": "1500" });
-		expect(parseRetryAfterHintMs(retryAfterMsHeaders, now)).toBe(1500);
-
-		const retryAfterSecondsHeaders = new Headers({ "retry-after": "3" });
-		expect(parseRetryAfterHintMs(retryAfterSecondsHeaders, now)).toBe(3000);
-
-		const retryAfterDateHeaders = new Headers({
-			"retry-after": "Sun, 22 Mar 2026 00:00:04 GMT",
-		});
-		expect(parseRetryAfterHintMs(retryAfterDateHeaders, now)).toBe(4000);
-
-		const resetSecondsHeaders = new Headers({ "x-ratelimit-reset": "1774137605" });
-		expect(parseRetryAfterHintMs(resetSecondsHeaders, now)).toBe(5000);
-
-		const resetMillisecondsHeaders = new Headers({
-			"x-ratelimit-reset": String(now + 6000),
-		});
-		expect(parseRetryAfterHintMs(resetMillisecondsHeaders, now)).toBe(6000);
-	});
-
-	it("keeps only allowlisted response headers for logging", () => {
-		const headers = new Headers({
-			"content-type": "text/event-stream",
-			"x-request-id": "req_123",
-			authorization: "secret",
-			cookie: "sensitive",
-		});
-
-		expect(sanitizeResponseHeadersForLog(headers)).toEqual({
-			"content-type": "text/event-stream",
-			"x-request-id": "req_123",
-		});
-	});
-});
diff --git a/test/runtime-observability.test.ts b/test/runtime-observability.test.ts
new file mode 100644
index 00000000..924e032a
--- /dev/null
+++ b/test/runtime-observability.test.ts
@@ -0,0 +1,147 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+const readFileMock = vi.fn();
+const readFileSyncMock = vi.fn();
+const writeFileMock = vi.fn(async () => undefined);
+const renameMock = vi.fn(async () => undefined);
+const unlinkMock = vi.fn(async () => undefined);
+const mkdirMock = vi.fn(async () => undefined);
+vi.mock("node:fs", () => ({
+	existsSync: vi.fn(() => true),
+	readFileSync: readFileSyncMock,
+	promises: {
+		readFile: readFileMock,
+		writeFile: writeFileMock,
+		rename: renameMock,
+		unlink: unlinkMock,
+		mkdir: mkdirMock,
+	},
+}));
+
+vi.mock("../lib/runtime-paths.js", () => ({
+	getCodexMultiAuthDir: () => "/mock/.codex/multi-auth",
+}));
+
+describe("runtime observability snapshot versioning", () => {
+	const originalVitestEnv = process.env.VITEST;
+
+	beforeEach(() => {
+		vi.resetModules();
+		process.env.VITEST = originalVitestEnv;
+	});
+
+	afterEach(() => {
+		readFileMock.mockReset();
+		readFileSyncMock.mockReset();
+		writeFileMock.mockReset();
+		renameMock.mockReset();
+		unlinkMock.mockReset();
+		mkdirMock.mockReset();
+		if (originalVitestEnv === undefined) {
+			delete process.env.VITEST;
+		} else {
+			process.env.VITEST = originalVitestEnv;
+		}
+	});
+
+	it("normalizes legacy unversioned snapshots", async () => {
+		readFileMock.mockResolvedValueOnce(
+			JSON.stringify({
+				updatedAt: 1,
+				responsesRequests: 2,
+				runtimeMetrics: { totalRequests: 3 },
+			}),
+		);
+
+		const { loadPersistedRuntimeObservabilitySnapshot } = await import(
+			"../lib/runtime/runtime-observability.js"
+		);
+		const snapshot = await loadPersistedRuntimeObservabilitySnapshot();
+
+		expect(snapshot?.version).toBe(1);
+		expect(snapshot?.responsesRequests).toBe(2);
+		expect(snapshot?.runtimeMetrics.totalRequests).toBe(3);
+		expect(snapshot?.runtimeMetrics.failedRequests).toBe(0);
+	});
+
+	it("drops unknown future snapshot versions safely", async () => {
+		readFileMock.mockResolvedValueOnce(JSON.stringify({ version: 99 }));
+
+		const { loadPersistedRuntimeObservabilitySnapshot } = await import(
+			"../lib/runtime/runtime-observability.js"
+		);
+		const snapshot = await loadPersistedRuntimeObservabilitySnapshot();
+
+		expect(snapshot).toBeNull();
+	});
+
+	it("retries transient rename contention when persisting a snapshot", async () => {
+		process.env.VITEST = "";
+		let attempts = 0;
+		renameMock.mockImplementation(async () => {
+			attempts += 1;
+			if (attempts === 1) {
+				throw Object.assign(new Error("busy"), { code: "EBUSY" });
+			}
+		});
+
+		const mod = await import("../lib/runtime/runtime-observability.js");
+		mod.mutateRuntimeObservabilitySnapshot((snapshot) => {
+			snapshot.responsesRequests = 3;
+		});
+
+		await vi.waitFor(() => {
+			expect(renameMock).toHaveBeenCalledTimes(2);
+		});
+		expect(unlinkMock).toHaveBeenCalled();
+	});
+
+	it("contains permanent snapshot write failures without leaving pending writes rejected", async () => {
+		process.env.VITEST = "";
+		renameMock.mockImplementation(async () => {
+			throw Object.assign(new Error("disk full"), { code: "EIO" });
+		});
+
+		const mod = await import("../lib/runtime/runtime-observability.js");
+		mod.mutateRuntimeObservabilitySnapshot((snapshot) => {
+			snapshot.responsesRequests = 1;
+		});
+
+		await vi.waitFor(() => {
+			expect(renameMock).toHaveBeenCalled();
+		});
+
+		renameMock.mockReset();
+		renameMock.mockResolvedValue(undefined);
+		mod.mutateRuntimeObservabilitySnapshot((snapshot) => {
+			snapshot.responsesRequests = 2;
+		});
+
+		await vi.waitFor(() => {
+			expect(renameMock).toHaveBeenCalled();
+		});
+	});
+
+	it("seeds the first in-memory snapshot from disk before mutating", async () => {
+		process.env.VITEST = "";
+		readFileSyncMock.mockReturnValue(
+			JSON.stringify({
+				version: 1,
+				authRefreshRequests: 7,
+				poolExhaustionCooldownUntil: 12345,
+				runtimeMetrics: { failedRequests: 4 },
+			}),
+		);
+
+		const mod = await import("../lib/runtime/runtime-observability.js");
+		mod.mutateRuntimeObservabilitySnapshot((snapshot) => {
+			snapshot.responsesRequests = 9;
+		});
+
+		const snapshot = mod.getRuntimeObservabilitySnapshot();
+		expect(snapshot.responsesRequests).toBe(9);
+		expect(snapshot.authRefreshRequests).toBe(7);
+		expect(snapshot.poolExhaustionCooldownUntil).toBe(12345);
+		expect(snapshot.runtimeMetrics.failedRequests).toBe(4);
+	});
+});
diff --git a/test/runtime-services.test.ts b/test/runtime-services.test.ts
index 4cfe1101..2de43cce 100644
--- a/test/runtime-services.test.ts
+++ b/test/runtime-services.test.ts
@@ -13,6 +13,7 @@ describe("runtime services helpers", () => {
 			targetPath: "/tmp/a",
 			currentSync: { stop, syncToPath: vi.fn() },
 			currentPath: "/tmp/old",
+			currentConfigKey: "old",
 			createSync: vi.fn(),
 			registerCleanup: vi.fn(),
 			logWarn: vi.fn(),
@@ -23,6 +24,27 @@ describe("runtime services helpers", () => {
 		expect(result).toEqual({
 			liveAccountSync: null,
 			liveAccountSyncPath: null,
+			liveAccountSyncConfigKey: null,
+		});
+	});
+
+	it("clears stale live sync path and config key even when no watcher exists", async () => {
+		const result = await ensureLiveAccountSyncState({
+			enabled: false,
+			targetPath: "/tmp/a",
+			currentSync: null,
+			currentPath: "/tmp/stale",
+			currentConfigKey: "stale",
+			createSync: vi.fn(),
+			registerCleanup: vi.fn(),
+			logWarn: vi.fn(),
+			pluginName: "plugin",
+		});
+
+		expect(result).toEqual({
+			liveAccountSync: null,
+			liveAccountSyncPath: null,
+			liveAccountSyncConfigKey: null,
 		});
 	});
 
@@ -34,6 +56,7 @@ describe("runtime services helpers", () => {
 			targetPath: "/tmp/a",
 			currentSync: null,
 			currentPath: null,
+			configKey: "25:250",
 			createSync: vi.fn(() => created),
 			registerCleanup: vi.fn(),
 			logWarn: vi.fn(),
@@ -43,6 +66,7 @@ describe("runtime services helpers", () => {
 		expect(syncToPath).toHaveBeenCalledWith("/tmp/a");
 		expect(result.liveAccountSync).toBe(created);
 		expect(result.liveAccountSyncPath).toBe("/tmp/a");
+		expect(result.liveAccountSyncConfigKey).toBe("25:250");
 	});
 
 	it("warns and keeps the previous path when busy retries are exhausted", async () => {
@@ -61,6 +85,7 @@ describe("runtime services helpers", () => {
 				targetPath: "/tmp/new",
 				currentSync,
 				currentPath: "/tmp/old",
+				currentConfigKey: "old",
 				createSync: vi.fn(),
 				registerCleanup: vi.fn(),
 				logWarn,
@@ -77,12 +102,82 @@ describe("runtime services helpers", () => {
 			expect(result).toEqual({
 				liveAccountSync: currentSync,
 				liveAccountSyncPath: "/tmp/old",
+				liveAccountSyncConfigKey: "old",
 			});
 		} finally {
 			vi.useRealTimers();
 		}
 	});
 
+	it("recreates live sync when config key changes", async () => {
+		const oldSync = { stop: vi.fn(), syncToPath: vi.fn() };
+		const newSync = { stop: vi.fn(), syncToPath: vi.fn().mockResolvedValue(undefined) };
+		const createSync = vi.fn(() => newSync);
+
+		const result = await ensureLiveAccountSyncState({
+			enabled: true,
+			targetPath: "/tmp/a",
+			currentSync: oldSync,
+			currentPath: "/tmp/a",
+			currentConfigKey: "25:250",
+			configKey: "50:500",
+			createSync,
+			registerCleanup: vi.fn(),
+			logWarn: vi.fn(),
+			pluginName: "plugin",
+		});
+
+		expect(oldSync.stop).toHaveBeenCalledTimes(1);
+		expect(createSync).toHaveBeenCalledTimes(1);
+		expect(newSync.syncToPath).toHaveBeenCalledWith("/tmp/a");
+		expect(result.liveAccountSync).toBe(newSync);
+		expect(result.liveAccountSyncConfigKey).toBe("50:500");
+	});
+
+	it("recreates live sync when the current config key is unknown", async () => {
+		const oldSync = { stop: vi.fn(), syncToPath: vi.fn() };
+		const newSync = { stop: vi.fn(), syncToPath: vi.fn().mockResolvedValue(undefined) };
+		const createSync = vi.fn(() => newSync);
+
+		const result = await ensureLiveAccountSyncState({
+			enabled: true,
+			targetPath: "/tmp/a",
+			currentSync: oldSync,
+			currentPath: "/tmp/a",
+			currentConfigKey: null,
+			configKey: "50:500",
+			createSync,
+			registerCleanup: vi.fn(),
+			logWarn: vi.fn(),
+			pluginName: "plugin",
+		});
+
+		expect(oldSync.stop).toHaveBeenCalledTimes(1);
+		expect(createSync).toHaveBeenCalledTimes(1);
+		expect(result.liveAccountSync).toBe(newSync);
+		expect(result.liveAccountSyncConfigKey).toBe("50:500");
+	});
+
+	it("keeps the existing watcher when configKey is undefined", async () => {
+		const currentSync = { stop: vi.fn(), syncToPath: vi.fn().mockResolvedValue(undefined) };
+		const result = await ensureLiveAccountSyncState({
+			enabled: true,
+			targetPath: "/tmp/a",
+			currentSync,
+			currentPath: "/tmp/a",
+			currentConfigKey: "25:250",
+			configKey: undefined,
+			createSync: vi.fn(),
+			registerCleanup: vi.fn(),
+			logWarn: vi.fn(),
+			pluginName: "plugin",
+		});
+
+		expect(currentSync.stop).not.toHaveBeenCalled();
+		expect(result.liveAccountSync).toBe(currentSync);
+		expect(result.liveAccountSyncConfigKey).toBe("25:250");
+	});
+
 	it("recreates refresh guardian when config changes and clears when disabled", () => {
 		const oldGuardian = { stop: vi.fn(), start: vi.fn() };
 		const createGuardian = vi.fn(() => ({ stop: vi.fn(), start: vi.fn() }));
diff --git a/test/schemas.test.ts b/test/schemas.test.ts
index 16cd2f97..a5b6d8b9 100644
--- a/test/schemas.test.ts
+++ b/test/schemas.test.ts
@@ -42,6 +42,10 @@ describe("PluginConfigSchema", () => {
 			perProjectAccounts: true,
 			sessionRecovery: true,
 			autoResume: false,
+			rateLimitDedupWindowMs: 2000,
+			rateLimitStateResetMs: 120000,
+			rateLimitMaxBackoffMs: 60000,
+			rateLimitShortRetryThresholdMs: 5000,
 			fetchTimeoutMs: 60000,
 			streamStallTimeoutMs: 45000,
 			liveAccountSync: true,
@@ -65,6 +69,8 @@ describe("PluginConfigSchema", () => {
 	});
 
 	it.each([
+		["rateLimitStateResetMs", 999, 1000],
+		["rateLimitMaxBackoffMs", 999, 1000],
 		["liveAccountSyncDebounceMs", 49, 50],
 		["liveAccountSyncPollMs", 499, 500],
 		["sessionAffinityTtlMs", 999, 1000],
@@ -80,6 +86,8 @@ describe("PluginConfigSchema", () => {
 	});
 
 	it.each([
+		["rateLimitDedupWindowMs", -1, 0],
+		["rateLimitShortRetryThresholdMs", -1, 0],
 		["networkErrorCooldownMs", -1, 0],
 		["serverErrorCooldownMs", -1, 0],
 	] as const)("allows zero and rejects negatives for %s", (key, invalidValue, validValue) => {
@@ -109,6 +117,10 @@ describe("PluginConfigSchema", () => {
 		"sessionAffinityMaxEntries",
 		"proactiveRefreshIntervalMs",
 		"proactiveRefreshBufferMs",
+		"rateLimitDedupWindowMs",
+		"rateLimitStateResetMs",
+		"rateLimitMaxBackoffMs",
+		"rateLimitShortRetryThresholdMs",
 		"networkErrorCooldownMs",
 		"serverErrorCooldownMs",
 		"preemptiveQuotaRemainingPercent5h",
diff --git a/test/session-affinity.test.ts b/test/session-affinity.test.ts
index 1bf1ae73..a5b0c6d6 100644
--- a/test/session-affinity.test.ts
+++ b/test/session-affinity.test.ts
@@ -1,4 +1,4 @@
-import { describe, expect, it } from "vitest";
+import { describe, expect, it, vi } from "vitest";
 import { SessionAffinityStore } from "../lib/session-affinity.js";
 
 describe("SessionAffinityStore", () => {
@@ -143,4 +143,32 @@ describe("SessionAffinityStore", () => {
 		expect(store.getLastResponseId("session-a", 3_500)).toBe("resp_123");
 		expect(store.getPreferredAccountIndex("session-a", 3_500)).toBe(2);
 	});
+
+	it("ignores stale response-id writes from older overlapping requests", () => {
+		const store = new SessionAffinityStore({ ttlMs: 10_000, maxEntries: 4 });
+		store.rememberWithVersion("session-a", 1, 1_000, 1);
+		store.updateLastResponseId("session-a", "resp_first", 2_000, 1);
+		store.rememberWithVersion("session-a", 2, 3_000, 2);
+		store.updateLastResponseId("session-a", "resp_second", 4_000, 2);
+
+		store.rememberWithVersion("session-a", 1, 5_000, 1);
+		store.updateLastResponseId("session-a", "resp_stale", 5_000, 1);
+
+		expect(store.getPreferredAccountIndex("session-a", 5_500)).toBe(2);
+		expect(store.getLastResponseId("session-a", 5_500)).toBe("resp_second");
+	});
+
+	it("generates distinct default write versions for same-timestamp overlaps", () => {
+		vi.useFakeTimers();
+		vi.setSystemTime(new Date("2026-04-06T00:00:00.000Z"));
+		try {
+			const store = new SessionAffinityStore({ ttlMs: 10_000, maxEntries: 4 });
+			store.rememberWithVersion("session-a", 0, 1_000);
+			store.rememberWithVersion("session-a", 1, 1_000);
+
+			expect(store.getPreferredAccountIndex("session-a", 1_500)).toBe(1);
+		} finally {
+			vi.useRealTimers();
+		}
+	});
 });
diff --git a/test/storage-health-inspection.test.ts b/test/storage-health-inspection.test.ts
new file mode 100644
index 00000000..8eec9c4f
--- /dev/null
+++ b/test/storage-health-inspection.test.ts
@@ -0,0 +1,150 @@
+import { createHash } from "node:crypto";
+import { existsSync, promises as fs } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+function sha256(value: string): string {
+	return createHash("sha256").update(value).digest("hex");
+}
+
+describe("inspectStorageHealth", () => {
+	async function withStorageModule<T>(
+		testName: string,
+		runner: (args: { workDir: string; storagePath: string; resetMarkerPath: string; walPath: string; storageModule: typeof import("../lib/storage.js") }) => Promise<T>,
+	): Promise<T> {
+		const workDir = join(tmpdir(), `${testName}-${Date.now()}`);
+		await fs.mkdir(workDir, { recursive: true });
+		const storagePath = join(workDir, "accounts.json");
+		const resetMarkerPath = `${storagePath}.reset-intent`;
+		const walPath = `${storagePath}.wal`;
+		const storageModule = await import("../lib/storage.js");
+		storageModule.setStoragePathDirect(storagePath);
+		try {
+			return await runner({ workDir, storagePath, resetMarkerPath, walPath, storageModule });
+		} finally {
+			storageModule.setStoragePathDirect(null);
+			await fs.rm(workDir, { recursive: true, force: true });
+		}
+	}
+
+	afterEach(() => {
+		vi.resetModules();
+	});
+
+	it("keeps WAL inspection read-only and silent", async () => {
+		const logWarn = vi.fn();
+		const logInfo = vi.fn();
+		vi.doMock("../lib/logger.js", () => ({
+			createLogger: () => ({
+				warn: logWarn,
+				info: logInfo,
+				debug: vi.fn(),
+				error: vi.fn(),
+			}),
+		}));
+
+		const workDir = join(tmpdir(), `storage-health-${Date.now()}`);
+		await fs.mkdir(workDir, { recursive: true });
+		const storagePath = join(workDir, "accounts.json");
+		const walPath = `${storagePath}.wal`;
+
+		const content = JSON.stringify({
+			version: 3,
+			activeIndex: 0,
+			activeIndexByFamily: { codex: 0 },
+			accounts: [
+				{
+					refreshToken: "refresh-token",
+					accountId: "acc-1",
+					addedAt: 1,
+					lastUsed: 1,
+				},
+			],
+		});
+		await fs.writeFile(
+			walPath,
+			JSON.stringify({ version: 1, content, checksum: sha256(content) }),
+			"utf-8",
+		);
+
+		const storageModule = await import("../lib/storage.js");
+		storageModule.setStoragePathDirect(storagePath);
+
+		try {
+			const summary = await storageModule.inspectStorageHealth();
+			expect(summary.state).toBe("recoverable");
+			expect(summary.recoverySource).toBe("wal");
+			expect(existsSync(storagePath)).toBe(false);
+			expect(logWarn).not.toHaveBeenCalledWith(
+				"Recovered account storage from WAL journal",
+				expect.anything(),
+			);
+			expect(logInfo).not.toHaveBeenCalled();
+		} finally {
+			storageModule.setStoragePathDirect(null);
+			await fs.rm(workDir, { recursive: true, force: true });
+		}
+	});
+
+	it("reports healthy storage when primary storage is valid", async () => {
+		await withStorageModule("storage-health-healthy", async ({ storagePath, storageModule }) => {
+			await fs.writeFile(
+				storagePath,
+				JSON.stringify({
+					version: 3,
+					activeIndex: 0,
+					activeIndexByFamily: { codex: 0 },
+					accounts: [{ refreshToken: "r", addedAt: 1, lastUsed: 1 }],
+				}),
+				"utf-8",
+			);
+			const summary = await storageModule.inspectStorageHealth();
+			expect(summary.state).toBe("healthy");
+		});
+	});
+
+	it("reports empty storage when primary storage is valid but has no accounts", async () => {
+		await withStorageModule("storage-health-empty", async ({ storagePath, storageModule }) => {
+			await fs.writeFile(
+				storagePath,
+				JSON.stringify({ version: 3, activeIndex: 0, activeIndexByFamily: { codex: 0 }, accounts: [] }),
+				"utf-8",
+			);
+			const summary = await storageModule.inspectStorageHealth();
+			expect(summary.state).toBe("empty");
+		});
+	});
+
+	it("reports intentional-reset when the reset marker exists", async () => {
+		await withStorageModule("storage-health-reset", async ({ resetMarkerPath, storageModule }) => {
+			await fs.writeFile(resetMarkerPath, "", "utf-8");
+			const summary = await storageModule.inspectStorageHealth();
+			expect(summary.state).toBe("intentional-reset");
+		});
+	});
+
+	it("reports corrupt storage when primary storage is malformed and WAL is unavailable", async () => {
+		await withStorageModule("storage-health-corrupt-json", async ({ storagePath, storageModule }) => {
+			await fs.writeFile(storagePath, "{ malformed-json", "utf-8");
+			const summary = await storageModule.inspectStorageHealth();
+			expect(summary.state).toBe("corrupt");
+		});
+	});
+
+	it("reports recoverable storage when invalid primary storage has a valid WAL", async () => {
+		await withStorageModule("storage-health-recoverable-invalid", async ({ storagePath, walPath, storageModule }) => {
+			await fs.writeFile(storagePath, "{ malformed-json", "utf-8");
+			const content = JSON.stringify({
+				version: 3,
+				activeIndex: 0,
+				activeIndexByFamily: { codex: 0 },
+				accounts: [{ refreshToken: "refresh-token", addedAt: 1, lastUsed: 1 }],
+			});
+			await fs.writeFile(walPath, JSON.stringify({ version: 1, content, checksum: sha256(content) }), "utf-8");
+			const summary = await storageModule.inspectStorageHealth();
+			expect(summary.state).toBe("recoverable");
+			expect(summary.recoverySource).toBe("wal");
+		});
+	});
+});
diff --git a/test/storage.test.ts b/test/storage.test.ts
index a94ca84d..763b5e09 100644
--- a/test/storage.test.ts
+++ b/test/storage.test.ts
@@ -2961,14 +2961,44 @@ describe("storage", () => {
 			await fs.rm(testWorkDir, { recursive: true, force: true });
 		});
 
-		it("logs but does not throw on non-ENOENT errors", async () => {
-			const readOnlyDir = join(testWorkDir, "readonly");
-			await fs.mkdir(readOnlyDir, { recursive: true });
-			const readOnlyFile = join(readOnlyDir, "accounts.json");
-			await fs.writeFile(readOnlyFile, "{}");
-			setStoragePathDirect(readOnlyFile);
+		it("throws and does not write reset marker when the primary storage file cannot be removed", async () => {
+			await fs.writeFile(testStoragePath, "{}", "utf-8");
+			const resetMarkerPath = getIntentionalResetMarkerPath(testStoragePath);
+			const unlinkSpy = vi
+				.spyOn(fs, "unlink")
+				.mockImplementation(async (targetPath) => {
+					if (String(targetPath) === testStoragePath) {
+						const error = Object.assign(new Error("locked"), { code: "EPERM" });
+						throw error;
+					}
+					return Promise.resolve();
+				});
 
-			await expect(clearAccounts()).resolves.not.toThrow();
+			await expect(clearAccounts()).rejects.toMatchObject({ code: "EPERM" });
+			expect(existsSync(testStoragePath)).toBe(true);
+			expect(existsSync(resetMarkerPath)).toBe(false);
+
+			unlinkSpy.mockRestore();
+		});
+
+		it("throws and does not write reset marker when the wal file cannot be removed", async () => {
+			await fs.writeFile(testStoragePath, "{}", "utf-8");
+			await fs.writeFile(`${testStoragePath}.wal`, "{}", "utf-8");
+			const resetMarkerPath = getIntentionalResetMarkerPath(testStoragePath);
+			const unlinkSpy = vi.spyOn(fs, "unlink").mockImplementation(async (targetPath) => {
+				if (String(targetPath) === `${testStoragePath}.wal`) {
+					const error = Object.assign(new Error("locked"), { code: "EBUSY" });
+					throw error;
+				}
+				return Promise.resolve();
+			});
+
+			try {
+				await expect(clearAccounts()).rejects.toMatchObject({ code: "EBUSY" });
+				expect(existsSync(resetMarkerPath)).toBe(false);
+			} finally {
+				unlinkSpy.mockRestore();
+			}
 		});
 	});
 
@@ -4122,7 +4152,7 @@ describe("storage", () => {
 					Object.assign(new Error("EACCES error"), { code: "EACCES" }),
 				);
 
-			await clearAccounts();
+			await expect(clearAccounts()).rejects.toMatchObject({ code: "EACCES" });
 
 			expect(unlinkSpy).toHaveBeenCalled();
 			unlinkSpy.mockRestore();
diff --git a/test/stream-failover.test.ts b/test/stream-failover.test.ts
index 52ec7eeb..cc64db89 100644
--- a/test/stream-failover.test.ts
+++ b/test/stream-failover.test.ts
@@ -18,6 +18,21 @@ function makeStallingResponse(): Response {
 	);
 }
 
+function makeIdleResponse(): Response {
+	return new Response(
+		new ReadableStream<Uint8Array>({
+			start() {
+				// Intentionally idle until timeout.
+			},
+		}),
+		{
+			headers: {
+				"content-type": "text/event-stream",
+			},
+		},
+	);
+}
+
 function makeSseResponse(payload: string): Response {
 	return new Response(
 		new ReadableStream<Uint8Array>({
@@ -52,7 +67,7 @@ describe("stream failover", () => {
 	it("switches to fallback stream when primary stalls", async () => {
 		vi.useFakeTimers();
 		const fallback = vi.fn(async () => makeSseResponse("data: second\n\n"));
-		const response = withStreamingFailover(makeStallingResponse(), fallback, {
+		const response = withStreamingFailover(makeIdleResponse(), fallback, {
 			maxFailovers: 1,
 			stallTimeoutMs: 10,
 		});
@@ -60,7 +75,6 @@ describe("stream failover", () => {
 		const textPromise = response.text();
 		await vi.advanceTimersByTimeAsync(1_200);
 		const text = await textPromise;
-		expect(text).toContain("data: first");
 		expect(text).toContain("codex-multi-auth failover 1");
 		expect(text).toContain("data: second");
 		expect(fallback).toHaveBeenCalledTimes(1);
@@ -69,7 +83,7 @@ describe("stream failover", () => {
 	it("includes request id marker when provided", async () => {
 		vi.useFakeTimers();
 		const response = withStreamingFailover(
-			makeStallingResponse(),
+			makeIdleResponse(),
 			async () => makeSseResponse("data: fallback\n\n"),
 			{
 				maxFailovers: 1,
@@ -87,7 +101,7 @@ describe("stream failover", () => {
 	it("errors when fallback is unavailable", async () => {
 		vi.useFakeTimers();
 		const response = withStreamingFailover(
-			makeStallingResponse(),
+			makeIdleResponse(),
 			async () => null,
 			{ maxFailovers: 1, stallTimeoutMs: 10 },
 		);
@@ -101,7 +115,7 @@ describe("stream failover", () => {
 	it("propagates fallback provider exceptions deterministically", async () => {
 		vi.useFakeTimers();
 		const response = withStreamingFailover(
-			makeStallingResponse(),
+			makeIdleResponse(),
 			async () => {
 				throw new Error("fallback exploded");
 			},
@@ -114,7 +128,7 @@ describe("stream failover", () => {
 		await assertion;
 	});
 
-	it("calls fallback exactly once when read-error and timeout race", async () => {
+	it("does not trigger fallback when read-error and timeout race after bytes emitted", async () => {
 		vi.useFakeTimers();
 		const raceResponse = new Response(
 			new ReadableStream<Uint8Array>({
@@ -139,13 +153,27 @@ describe("stream failover", () => {
 		});
 
 		const textPromise = response.text();
+		const assertion = expect(textPromise).rejects.toThrow("primary read failure");
 		await vi.advanceTimersByTimeAsync(1_200);
-		const text = await textPromise;
+		await assertion;
 
-		expect(fallback).toHaveBeenCalledTimes(1);
-		expect((text.match(/codex-multi-auth failover 1/g) ?? []).length).toBe(1);
-		expect(text).toContain("data: first");
-		expect(text).toContain("data: fallback");
+		expect(fallback).not.toHaveBeenCalled();
+	});
+
+	it("does not replay after bytes have already been emitted", async () => {
+		vi.useFakeTimers();
+		const fallback = vi.fn(async () => makeSseResponse("data: fallback\n\n"));
+		const response = withStreamingFailover(makeStallingResponse(), fallback, {
+			maxFailovers: 1,
+			stallTimeoutMs: 10,
+		});
+
+		const textPromise = response.text();
+		const assertion = expect(textPromise).rejects.toThrow("SSE stream stalled");
+		await vi.advanceTimersByTimeAsync(1_200);
+		await assertion;
+
+		expect(fallback).not.toHaveBeenCalled();
 	});
 
 	it("releases underlying reader when wrapped stream is cancelled", async () => {