diff --git a/lib/request/rate-limit-backoff.ts b/lib/request/rate-limit-backoff.ts index 9b528b6e..03b82f6a 100644 --- a/lib/request/rate-limit-backoff.ts +++ b/lib/request/rate-limit-backoff.ts @@ -17,6 +17,7 @@ export interface RateLimitBackoffResult { const RATE_LIMIT_DEDUP_WINDOW_MS = 2000; const RATE_LIMIT_STATE_RESET_MS = 120_000; const MAX_BACKOFF_MS = 60_000; +const RATE_LIMIT_BACKOFF_JITTER_FACTOR = 0.2; export const RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS = 5000; @@ -24,6 +25,7 @@ interface RateLimitState { consecutive429: number; lastAt: number; quotaKey: string; + lastDelayMs: number; } const rateLimitStateByAccountQuota = new Map(); @@ -33,6 +35,11 @@ function normalizeDelayMs(value: number | null | undefined, fallback: number): n return Math.max(0, Math.floor(candidate)); } +function addBackoffJitter(baseMs: number): number { + const jitter = baseMs * RATE_LIMIT_BACKOFF_JITTER_FACTOR * (Math.random() * 2 - 1); + return Math.max(0, Math.floor(baseMs + jitter)); +} + function pruneStaleRateLimitState(): void { const now = Date.now(); for (const [key, state] of rateLimitStateByAccountQuota) { @@ -58,13 +65,9 @@ export function getRateLimitBackoff( const baseDelay = normalizeDelayMs(serverRetryAfterMs, 1000); if (previous && now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS) { - const backoffDelay = Math.min( - baseDelay * Math.pow(2, previous.consecutive429 - 1), - MAX_BACKOFF_MS, - ); return { attempt: previous.consecutive429, - delayMs: Math.max(baseDelay, backoffDelay), + delayMs: previous.lastDelayMs, isDuplicate: true, }; } @@ -74,16 +77,18 @@ export function getRateLimitBackoff( ? previous.consecutive429 + 1 : 1; + const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), MAX_BACKOFF_MS); + const jitteredDelay = Math.min(addBackoffJitter(backoffDelay), MAX_BACKOFF_MS); + const delayMs = Math.max(baseDelay, jitteredDelay); rateLimitStateByAccountQuota.set(stateKey, { consecutive429: attempt, lastAt: now, quotaKey, + lastDelayMs: delayMs, }); - - const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), MAX_BACKOFF_MS); return { attempt, - delayMs: Math.max(baseDelay, backoffDelay), + delayMs, isDuplicate: false, }; } diff --git a/test/rate-limit-backoff.test.ts b/test/rate-limit-backoff.test.ts index 107772f8..cba02f85 100644 --- a/test/rate-limit-backoff.test.ts +++ b/test/rate-limit-backoff.test.ts @@ -12,10 +12,12 @@ describe("Rate limit backoff", () => { vi.useFakeTimers(); vi.setSystemTime(new Date(0)); clearRateLimitBackoffState(); + vi.spyOn(Math, "random").mockReturnValue(0.5); }); afterEach(() => { clearRateLimitBackoffState(); + vi.restoreAllMocks(); vi.useRealTimers(); }); @@ -39,6 +41,24 @@ describe("Rate limit backoff", () => { expect(second.isDuplicate).toBe(false); }); + it("applies jitter to new backoff windows but keeps duplicate retries stable", () => { + vi.mocked(Math.random).mockReturnValueOnce(1); + const first = getRateLimitBackoff(4, "jitter-test", 1000); + expect(first.delayMs).toBe(1200); + + vi.setSystemTime(new Date(1000)); + vi.mocked(Math.random).mockReturnValueOnce(0); + const duplicate = getRateLimitBackoff(4, "jitter-test", 1000); + expect(duplicate.delayMs).toBe(1200); + expect(duplicate.isDuplicate).toBe(true); + + vi.setSystemTime(new Date(2500)); + vi.mocked(Math.random).mockReturnValueOnce(0); + const second = getRateLimitBackoff(4, "jitter-test", 1000); + expect(second.delayMs).toBe(1600); + expect(second.isDuplicate).toBe(false); + }); + it("resets after quiet period", () => { getRateLimitBackoff(0, "codex", 1000); vi.setSystemTime(new Date(121_000));