oarisur · oarisur · May 27, 2026 · May 27, 2026
diff --git a/__tests__/diff-parser.test.ts b/__tests__/diff-parser.test.ts
@@ -1,9 +1,10 @@
-import { parsePRFiles, isCodeFile } from "../src/diff-parser";
+import { parsePRFiles, isCodeFile, extractStringLiterals } from "../src/diff-parser";
 import {
   REDUX_TO_ZUSTAND_PATCH,
   API_ROUTE_PATCH,
   DB_SWITCH_PATCH,
   UNRELATED_CSS_PATCH,
+  MODEL_NAME_CHANGE_PATCH,
   makePRFile,
 } from "./fixtures/diffs";
 
@@ -113,4 +114,62 @@ describe("parsePRFiles", () => {
     expect(skippedFiles).toHaveLength(1);
     expect(skippedFiles[0]).toContain("no patch data");
   });
+
+  it("extracts changedLiterals from a model name change diff", () => {
+    const files = [makePRFile("src/llm-client.ts", MODEL_NAME_CHANGE_PATCH)];
+    const { changedFiles } = parsePRFiles(files, DEFAULT_EXTENSIONS, 20);
+
+    expect(changedFiles).toHaveLength(1);
+    const file = changedFiles[0];
+    // Only +/- lines are parsed — context lines (unchanged) are not included
+    expect(file.changedLiterals).toContain("gpt-4o");
+    expect(file.changedLiterals).toContain("gpt-4o-mini");
+  });
+
+  it("extracts API URL literals from route change diff", () => {
+    const files = [makePRFile("src/routes/users.ts", API_ROUTE_PATCH)];
+    const { changedFiles } = parsePRFiles(files, DEFAULT_EXTENSIONS, 20);
+
+    const file = changedFiles[0];
+    expect(file.changedLiterals).toContain("/api/v2/users");
+    expect(file.changedLiterals).toContain("/api/v1/users");
+  });
+});
+
+describe("extractStringLiterals", () => {
+  it("extracts quoted string values from code lines", () => {
+    const lines = [
+      '  openai: "gpt-4o-mini",',
+      '  anthropic: "claude-3-5-sonnet-20241022",',
+    ];
+    const result = extractStringLiterals(lines);
+    expect(result).toContain("gpt-4o-mini");
+    expect(result).toContain("claude-3-5-sonnet-20241022");
+  });
+
+  it("filters out stopword literals", () => {
+    const lines = [
+      '"use strict";',
+      'const encoding = "utf-8";',
+      'const method = "GET";',
+      'const model = "gpt-4o";',
+    ];
+    const result = extractStringLiterals(lines);
+    expect(result).not.toContain("use strict");
+    expect(result).not.toContain("utf-8");
+    expect(result).not.toContain("GET");
+    expect(result).toContain("gpt-4o");
+  });
+
+  it("handles single-quoted strings", () => {
+    const lines = ["import { create } from 'zustand';"];
+    const result = extractStringLiterals(lines);
+    expect(result).toContain("zustand");
+  });
+
+  it("returns empty array for lines with no literals", () => {
+    const lines = ["const x = 42;", "if (y > 10) {"];
+    const result = extractStringLiterals(lines);
+    expect(result).toHaveLength(0);
+  });
 });
diff --git a/__tests__/doc-extractor.test.ts b/__tests__/doc-extractor.test.ts
@@ -1,7 +1,7 @@
 import { parseDocFile, buildDocIndex, findCandidateSections } from "../src/doc-extractor";
 import type { ChangedFile } from "../src/types";
-import { README_WITH_REDUX, ARCHITECTURE_WITH_V1_API, UNRELATED_CHANGELOG } from "./fixtures/docs";
-import { REDUX_TO_ZUSTAND_PATCH, API_ROUTE_PATCH, DB_SWITCH_PATCH } from "./fixtures/diffs";
+import { README_WITH_REDUX, ARCHITECTURE_WITH_V1_API, UNRELATED_CHANGELOG, README_WITH_CONFIG_TABLE } from "./fixtures/docs";
+import { REDUX_TO_ZUSTAND_PATCH, API_ROUTE_PATCH, DB_SWITCH_PATCH, MODEL_NAME_CHANGE_PATCH } from "./fixtures/diffs";
 
 // Suppress @actions/core logging during tests
 jest.mock("@actions/core", () => ({
@@ -15,7 +15,8 @@ jest.mock("@actions/core", () => ({
 function makeChangedFile(
   filePath: string,
   patch: string,
-  changedSymbols: string[]
+  changedSymbols: string[],
+  changedLiterals: string[] = []
 ): ChangedFile {
   const additions = patch
     .split("\n")
@@ -32,6 +33,7 @@ function makeChangedFile(
     additions,
     deletions,
     changedSymbols,
+    changedLiterals,
     tokenEstimate: Math.ceil(patch.length / 4),
   };
 }
@@ -178,4 +180,40 @@ describe("findCandidateSections", () => {
     const candidates = findCandidateSections(dbChange, index, 1);
     expect(candidates.length).toBeLessThanOrEqual(1);
   });
+
+  it("matches model name change to Configuration section via string literals", () => {
+    const docs = [
+      parseDocFile("README.md", README_WITH_CONFIG_TABLE),
+    ];
+    const index = buildDocIndex(docs);
+
+    const modelChange = makeChangedFile(
+      "src/llm-client.ts",
+      MODEL_NAME_CHANGE_PATCH,
+      ["DEFAULT_MODELS"],
+      ["gpt-4o", "gpt-4o-mini", "claude-3-5-sonnet-20241022", "gemini-2.5-flash"]
+    );
+
+    const candidates = findCandidateSections(modelChange, index, 5);
+    expect(candidates.length).toBeGreaterThan(0);
+
+    // The Configuration section should be a top candidate because
+    // it contains the string literals "gpt-4o", "claude-3-5-sonnet-20241022", etc.
+    const configCandidate = candidates.find(
+      (c) => c.matchedSection.heading === "Configuration"
+    );
+    expect(configCandidate).toBeDefined();
+    expect(configCandidate!.relevanceScore).toBeGreaterThan(0);
+  });
+
+  it("indexes quoted string values from doc content as keywords", () => {
+    const doc = parseDocFile("README.md", README_WITH_CONFIG_TABLE);
+    const configSection = doc.sections.find((s) => s.heading === "Configuration");
+    expect(configSection).toBeDefined();
+
+    // The config table mentions gpt-4o in backtick-quoted inline code
+    expect(configSection!.keywords).toContain("gpt-4o");
+    expect(configSection!.keywords).toContain("claude-3-5-sonnet-20241022");
+    expect(configSection!.keywords).toContain("gemini-2.5-flash");
+  });
 });
diff --git a/__tests__/doc-patcher.test.ts b/__tests__/doc-patcher.test.ts
@@ -34,6 +34,7 @@ function makeChangedFile(overrides?: Partial<ChangedFile>): ChangedFile {
     additions: ["+import zustand"],
     deletions: ["-import redux"],
     changedSymbols: ["useCartStore"],
+    changedLiterals: [],
     tokenEstimate: 50,
     ...overrides,
   };

diff --git a/__tests__/drift-detector.test.ts b/__tests__/drift-detector.test.ts
@@ -36,6 +36,7 @@ function makeCodeFiles() {
         .filter((l) => l.startsWith("-") && !l.startsWith("---"))
         .map((l) => l.slice(1)),
       changedSymbols: ["useCartStore", "cartSlice", "createSlice"],
+      changedLiterals: [],
       tokenEstimate: Math.ceil(REDUX_TO_ZUSTAND_PATCH.length / 4),
     },
   ];

diff --git a/__tests__/fixtures/diffs.ts b/__tests__/fixtures/diffs.ts
@@ -71,6 +71,17 @@ export const UNRELATED_CSS_PATCH = `
  }
 `.trim();
 
+// Fixture: diff that changes a default model name (config value change)
+export const MODEL_NAME_CHANGE_PATCH = `
+@@ -55,7 +55,7 @@
+ const DEFAULT_MODELS: Record<LLMProvider, string> = {
+-  openai: "gpt-4o",
++  openai: "gpt-4o-mini",
+   anthropic: "claude-3-5-sonnet-20241022",
+   gemini: "gemini-2.5-flash",
+ };
+`.trim();
+
 // Fixture: GitHub PR file list entries
 export function makePRFile(
   filename: string,

diff --git a/__tests__/fixtures/docs.ts b/__tests__/fixtures/docs.ts
@@ -66,3 +66,22 @@ export const UNRELATED_CHANGELOG = `
 
 - Initial release
 `.trim();
+
+// Fixture: README with a configuration table mentioning model defaults
+export const README_WITH_CONFIG_TABLE = `
+# Knowledge Diff
+
+A GitHub Action that detects documentation drift.
+
+## Configuration
+
+| Input | Default | Description |
+|---|---|---|
+| \`llm-provider\` | \`openai\` | LLM backend: \`openai\`, \`anthropic\`, or \`gemini\`. |
+| \`llm-model\` | \`gpt-4o\` / \`claude-3-5-sonnet-20241022\` / \`gemini-2.5-flash\` | Override the model. |
+| \`sensitivity\` | \`medium\` | Drift threshold: \`low\`, \`medium\`, \`high\`. |
+
+## How It Works
+
+The action parses the PR diff and matches code changes against documentation sections.
+`.trim();
diff --git a/__tests__/pr-commenter.test.ts b/__tests__/pr-commenter.test.ts
@@ -38,6 +38,7 @@ function makeChangedFile(overrides?: Partial<ChangedFile>): ChangedFile {
     additions: ["+import zustand"],
     deletions: ["-import redux"],
     changedSymbols: ["useCartStore"],
+    changedLiterals: [],
     tokenEstimate: 50,
     ...overrides,
   };

diff --git a/dist/index.js b/dist/index.js
@@ -79409,6 +79409,42 @@ const JS_KEYWORDS = new Set([
     "from", "import", "export", "return", "const", "class", "async",
     "await", "true", "false", "null", "undefined", "this", "super",
 ]);
+// ─── String Literal Extraction ────────────────────────────────────────────────
+/**
+ * Captures quoted string values from changed lines.
+ * Matches strings like "gpt-4o-mini", 'openai', "/api/v2/users", etc.
+ * Minimum length 3, must start with alphanumeric to filter punctuation-only values.
+ */
+const STRING_LITERAL_RE = /["']([a-zA-Z0-9/][a-zA-Z0-9_./@:-]{2,})["']/g;
+/** Common non-architectural strings to ignore during literal extraction. */
+const LITERAL_STOPWORDS = new Set([
+    "use strict", "utf-8", "utf8", "ascii", "base64", "hex",
+    "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS",
+    "get", "post", "put", "delete", "patch", "head", "options",
+    "text/plain", "text/html", "application/json",
+    "Content-Type", "content-type", "Authorization", "authorization",
+    "string", "number", "boolean", "object", "function",
+    "node_modules", "package.json", "tsconfig.json",
+    "click", "submit", "change", "input", "keydown", "keyup",
+    "div", "span", "button", "form", "table",
+]);
+/**
+ * Extract meaningful string literal values from changed lines.
+ * These capture configuration values, model names, URLs, library names, etc.
+ */
+function extractStringLiterals(lines) {
+    const literals = new Set();
+    const text = lines.join("\n");
+    STRING_LITERAL_RE.lastIndex = 0;
+    let match;
+    while ((match = STRING_LITERAL_RE.exec(text)) !== null) {
+        const value = match[1];
+        if (!LITERAL_STOPWORDS.has(value)) {
+            literals.add(value);
+        }
+    }
+    return Array.from(literals);
+}
 // ─── File Extension Check ─────────────────────────────────────────────────────
 function isCodeFile(filePath, allowedExtensions) {
     const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
@@ -79447,13 +79483,16 @@ function parsePRFiles(files, allowedExtensions, maxFiles) {
         }
         const { additions, deletions } = parsePatchLines(file.patch);
         // Only extract symbols from *changed* lines (not context lines)
-        const changedSymbols = extractSymbols([...additions, ...deletions]);
+        const changedLines = [...additions, ...deletions];
+        const changedSymbols = extractSymbols(changedLines);
+        const changedLiterals = extractStringLiterals(changedLines);
         changedFiles.push({
             filePath: file.filename,
             patch: file.patch,
             additions,
             deletions,
             changedSymbols,
+            changedLiterals,
             tokenEstimate: estimateTokens(file.patch),
         });
         processed++;
@@ -112037,6 +112076,8 @@ class LLMClient {
 // ─── Shared Constants ─────────────────────────────────────────────────────────
 /** Technology keywords that signal architecture intent. Shared across keyword extraction and candidate matching. */
 const TECH_KEYWORD_RE = /\b(redux|zustand|mobx|recoil|jotai|react|vue|angular|express|fastapi|django|rails|postgres|mysql|mongodb|graphql|rest|grpc|websocket|kafka|rabbitmq|redis|docker|kubernetes|aws|gcp|azure)\b/gi;
+/** Captures meaningful quoted string values from documentation content (model names, config values, etc.). */
+const DOC_STRING_LITERAL_RE = /["'`]([a-zA-Z0-9][a-zA-Z0-9_./@:-]{2,})["'`]/g;
 // ─── Markdown Section Splitting ───────────────────────────────────────────────
 const HEADING_RE = /^(#{1,6})\s+(.+)$/;
 /**
@@ -112135,6 +112176,11 @@ function extractKeywords(heading, content) {
     for (const m of content.matchAll(TECH_KEYWORD_RE)) {
         kw.add(m[1].toLowerCase());
     }
+    // Quoted string values in documentation (model names, config values, URLs, etc.)
+    // These are critical for matching diffs that change string literal values.
+    for (const m of content.matchAll(DOC_STRING_LITERAL_RE)) {
+        kw.add(m[1].toLowerCase());
+    }
     return Array.from(kw);
 }
 function buildIndex(docFiles) {
@@ -112182,6 +112228,12 @@ function findCandidateSections(changedFile, index, topN = 3) {
     for (const m of changeText.matchAll(TECH_KEYWORD_RE)) {
         queryTerms.add(m[1].toLowerCase());
     }
+    // String literal values from the diff (model names, config values, URLs, etc.)
+    if (changedFile.changedLiterals) {
+        for (const lit of changedFile.changedLiterals) {
+            queryTerms.add(lit.toLowerCase());
+        }
+    }
     // Score sections by how many query terms they match
     for (const term of queryTerms) {
         const sections = index.get(term) ?? [];
@@ -112270,7 +112322,7 @@ class DriftDetector {
         let totalCandidates = 0;
         for (const changedFile of changedFiles) {
             info(`Analysing: ${changedFile.filePath}`);
-            const candidates = findCandidateSections(changedFile, docIndex, 3);
+            const candidates = findCandidateSections(changedFile, docIndex, 6);
             totalCandidates += candidates.length;
             if (candidates.length === 0) {
                 core_debug(`  No candidate doc sections found for ${changedFile.filePath}`);

diff --git a/dist/index.js.map b/dist/index.js.map
diff --git a/src/diff-parser.ts b/src/diff-parser.ts
@@ -68,6 +68,48 @@ const JS_KEYWORDS = new Set([
   "await", "true", "false", "null", "undefined", "this", "super",
 ]);
 
+// ─── String Literal Extraction ────────────────────────────────────────────────
+
+/**
+ * Captures quoted string values from changed lines.
+ * Matches strings like "gpt-4o-mini", 'openai', "/api/v2/users", etc.
+ * Minimum length 3, must start with alphanumeric to filter punctuation-only values.
+ */
+const STRING_LITERAL_RE = /["']([a-zA-Z0-9/][a-zA-Z0-9_./@:-]{2,})["']/g;
+
+/** Common non-architectural strings to ignore during literal extraction. */
+const LITERAL_STOPWORDS = new Set([
+  "use strict", "utf-8", "utf8", "ascii", "base64", "hex",
+  "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS",
+  "get", "post", "put", "delete", "patch", "head", "options",
+  "text/plain", "text/html", "application/json",
+  "Content-Type", "content-type", "Authorization", "authorization",
+  "string", "number", "boolean", "object", "function",
+  "node_modules", "package.json", "tsconfig.json",
+  "click", "submit", "change", "input", "keydown", "keyup",
+  "div", "span", "button", "form", "table",
+]);
+
+/**
+ * Extract meaningful string literal values from changed lines.
+ * These capture configuration values, model names, URLs, library names, etc.
+ */
+export function extractStringLiterals(lines: string[]): string[] {
+  const literals = new Set<string>();
+  const text = lines.join("\n");
+
+  STRING_LITERAL_RE.lastIndex = 0;
+  let match: RegExpExecArray | null;
+  while ((match = STRING_LITERAL_RE.exec(text)) !== null) {
+    const value = match[1];
+    if (!LITERAL_STOPWORDS.has(value)) {
+      literals.add(value);
+    }
+  }
+
+  return Array.from(literals);
+}
+
 // ─── File Extension Check ─────────────────────────────────────────────────────
 
 export function isCodeFile(
@@ -125,14 +167,17 @@ export function parsePRFiles(
     const { additions, deletions } = parsePatchLines(file.patch);
 
     // Only extract symbols from *changed* lines (not context lines)
-    const changedSymbols = extractSymbols([...additions, ...deletions]);
+    const changedLines = [...additions, ...deletions];
+    const changedSymbols = extractSymbols(changedLines);
+    const changedLiterals = extractStringLiterals(changedLines);
 
     changedFiles.push({
       filePath: file.filename,
       patch: file.patch,
       additions,
       deletions,
       changedSymbols,
+      changedLiterals,
       tokenEstimate: estimateTokens(file.patch),
     });