oarisur · oarisur · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/README.md b/README.md
@@ -226,11 +226,11 @@ If `auto-patch` is `false`, you only need `pull-requests: write`.
 
 ## Cost Estimate
 
-Each PR run makes approximately **N × 3** LLM calls, where N is the number of changed code files (up to `max-files-per-run`). Each call is a short prompt (~1,500 tokens) + a short completion (~400 tokens).
+Each PR run makes approximately **N × 6** LLM calls, where N is the number of changed code files (up to `max-files-per-run`). Each call is a short prompt (~1,500 tokens) + a short completion (~500 tokens).
 
 For a typical PR changing 5 files:
-- ~15 calls × ~1,900 tokens ≈ ~28,500 tokens
-- **Cost at gpt-4o pricing: ~$0.10 per PR run**
+- ~30 calls × ~2,000 tokens ≈ ~60,000 tokens
+- **Cost at gpt-4o pricing: ~$0.20 per PR run**
 
 Set `max-files-per-run: 10` and `sensitivity: low` to minimise cost on large PRs.
 

diff --git a/dist/index.js b/dist/index.js
@@ -111991,15 +111991,15 @@ class LLMClient {
                 { role: "user", content: userPrompt },
             ],
             temperature: 0.1,
-            max_tokens: 1024,
+            max_tokens: 2048,
             response_format: { type: "json_object" },
         }, { timeout: 60000 });
         return response.choices[0]?.message?.content ?? "{}";
     }
     async callAnthropic(userPrompt) {
         const response = await this.anthropicClient.messages.create({
             model: this.model,
-            max_tokens: 1024,
+            max_tokens: 2048,
             temperature: 0.1,
             system: SYSTEM_PROMPT,
             messages: [
@@ -112026,7 +112026,7 @@ class LLMClient {
                 config: {
                     systemInstruction: SYSTEM_PROMPT,
                     temperature: 0.1,
-                    maxOutputTokens: 1024,
+                    maxOutputTokens: 2048,
                     responseMimeType: "application/json",
                     abortSignal: controller.signal,
                 },
@@ -112278,6 +112278,12 @@ function parseDocFile(filePath, rawContent) {
  * but we add a guard here to skip candidates whose section content is empty.
  */
 const MAX_SECTION_CHARS = 15000;
+/**
+ * Minimum delay (ms) between consecutive LLM calls to respect API rate limits.
+ * Gemini free tier allows 5 RPM (~12s between calls). We use 1.5s as a reasonable
+ * default that works for paid tiers while reducing 429 storms on free tiers.
+ */
+const RATE_LIMIT_DELAY_MS = 1500;
 // ─── Sensitivity → Confidence Threshold ──────────────────────────────────────
 const CONFIDENCE_ORDER = ["definite", "likely", "possible"];
 function meetsThreshold(confidence, sensitivity) {
@@ -112335,6 +112341,10 @@ class DriftDetector {
                     continue;
                 }
                 core_debug(`  Checking against ${candidate.matchedSection.filePath}#${candidate.matchedSection.heading} (score: ${candidate.relevanceScore.toFixed(2)})`);
+                // Rate-limit: pause between LLM calls to stay within API quotas
+                if (totalCandidates > 1) {
+                    await new Promise((res) => setTimeout(res, RATE_LIMIT_DELAY_MS));
+                }
                 let llmResult;
                 try {
                     llmResult = await this.llm.detectDrift(changedFile.filePath, changedFile.patch, candidate.matchedSection.filePath, candidate.matchedSection.heading, candidate.matchedSection.content, this.sensitivity);

diff --git a/dist/index.js.map b/dist/index.js.map
diff --git a/src/drift-detector.ts b/src/drift-detector.ts
@@ -18,6 +18,13 @@ import { parseDocFile, buildDocIndex, findCandidateSections } from "./doc-extrac
  */
 const MAX_SECTION_CHARS = 15_000;
 
+/**
+ * Minimum delay (ms) between consecutive LLM calls to respect API rate limits.
+ * Gemini free tier allows 5 RPM (~12s between calls). We use 1.5s as a reasonable
+ * default that works for paid tiers while reducing 429 storms on free tiers.
+ */
+const RATE_LIMIT_DELAY_MS = 1_500;
+
 // ─── Sensitivity → Confidence Threshold ──────────────────────────────────────
 
 const CONFIDENCE_ORDER = ["definite", "likely", "possible"] as const;
@@ -107,6 +114,11 @@ export class DriftDetector {
           `  Checking against ${candidate.matchedSection.filePath}#${candidate.matchedSection.heading} (score: ${candidate.relevanceScore.toFixed(2)})`
         );
 
+        // Rate-limit: pause between LLM calls to stay within API quotas
+        if (totalCandidates > 1) {
+          await new Promise((res) => setTimeout(res, RATE_LIMIT_DELAY_MS));
+        }
+
         let llmResult;
         try {
           llmResult = await this.llm.detectDrift(

diff --git a/src/llm-client.ts b/src/llm-client.ts
@@ -214,7 +214,7 @@ export class LLMClient {
         { role: "user", content: userPrompt },
       ],
       temperature: 0.1,
-      max_tokens: 1024,
+      max_tokens: 2048,
       response_format: { type: "json_object" },
     }, { timeout: 60_000 });
 
@@ -224,7 +224,7 @@ export class LLMClient {
   private async callAnthropic(userPrompt: string): Promise<string> {
     const response = await this.anthropicClient!.messages.create({
       model: this.model,
-      max_tokens: 1024,
+      max_tokens: 2048,
       temperature: 0.1,
       system: SYSTEM_PROMPT,
       messages: [
@@ -252,7 +252,7 @@ export class LLMClient {
         config: {
           systemInstruction: SYSTEM_PROMPT,
           temperature: 0.1,
-          maxOutputTokens: 1024,
+          maxOutputTokens: 2048,
           responseMimeType: "application/json",
           abortSignal: controller.signal,
         },