Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion __tests__/diff-parser.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { parsePRFiles, isCodeFile } from "../src/diff-parser";
import { parsePRFiles, isCodeFile, extractStringLiterals } from "../src/diff-parser";
import {
REDUX_TO_ZUSTAND_PATCH,
API_ROUTE_PATCH,
DB_SWITCH_PATCH,
UNRELATED_CSS_PATCH,
MODEL_NAME_CHANGE_PATCH,
makePRFile,
} from "./fixtures/diffs";

Expand Down Expand Up @@ -113,4 +114,62 @@ describe("parsePRFiles", () => {
expect(skippedFiles).toHaveLength(1);
expect(skippedFiles[0]).toContain("no patch data");
});

it("extracts changedLiterals from a model name change diff", () => {
const files = [makePRFile("src/llm-client.ts", MODEL_NAME_CHANGE_PATCH)];
const { changedFiles } = parsePRFiles(files, DEFAULT_EXTENSIONS, 20);

expect(changedFiles).toHaveLength(1);
const file = changedFiles[0];
// Only +/- lines are parsed — context lines (unchanged) are not included
expect(file.changedLiterals).toContain("gpt-4o");
expect(file.changedLiterals).toContain("gpt-4o-mini");
});

it("extracts API URL literals from route change diff", () => {
const files = [makePRFile("src/routes/users.ts", API_ROUTE_PATCH)];
const { changedFiles } = parsePRFiles(files, DEFAULT_EXTENSIONS, 20);

const file = changedFiles[0];
expect(file.changedLiterals).toContain("/api/v2/users");
expect(file.changedLiterals).toContain("/api/v1/users");
});
});

describe("extractStringLiterals", () => {
it("extracts quoted string values from code lines", () => {
const lines = [
' openai: "gpt-4o-mini",',
' anthropic: "claude-3-5-sonnet-20241022",',
];
const result = extractStringLiterals(lines);
expect(result).toContain("gpt-4o-mini");
expect(result).toContain("claude-3-5-sonnet-20241022");
});

it("filters out stopword literals", () => {
const lines = [
'"use strict";',
'const encoding = "utf-8";',
'const method = "GET";',
'const model = "gpt-4o";',
];
const result = extractStringLiterals(lines);
expect(result).not.toContain("use strict");
expect(result).not.toContain("utf-8");
expect(result).not.toContain("GET");
expect(result).toContain("gpt-4o");
});

it("handles single-quoted strings", () => {
const lines = ["import { create } from 'zustand';"];
const result = extractStringLiterals(lines);
expect(result).toContain("zustand");
});

it("returns empty array for lines with no literals", () => {
const lines = ["const x = 42;", "if (y > 10) {"];
const result = extractStringLiterals(lines);
expect(result).toHaveLength(0);
});
});
44 changes: 41 additions & 3 deletions __tests__/doc-extractor.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { parseDocFile, buildDocIndex, findCandidateSections } from "../src/doc-extractor";
import type { ChangedFile } from "../src/types";
import { README_WITH_REDUX, ARCHITECTURE_WITH_V1_API, UNRELATED_CHANGELOG } from "./fixtures/docs";
import { REDUX_TO_ZUSTAND_PATCH, API_ROUTE_PATCH, DB_SWITCH_PATCH } from "./fixtures/diffs";
import { README_WITH_REDUX, ARCHITECTURE_WITH_V1_API, UNRELATED_CHANGELOG, README_WITH_CONFIG_TABLE } from "./fixtures/docs";
import { REDUX_TO_ZUSTAND_PATCH, API_ROUTE_PATCH, DB_SWITCH_PATCH, MODEL_NAME_CHANGE_PATCH } from "./fixtures/diffs";

// Suppress @actions/core logging during tests
jest.mock("@actions/core", () => ({
Expand All @@ -15,7 +15,8 @@ jest.mock("@actions/core", () => ({
function makeChangedFile(
filePath: string,
patch: string,
changedSymbols: string[]
changedSymbols: string[],
changedLiterals: string[] = []
): ChangedFile {
const additions = patch
.split("\n")
Expand All @@ -32,6 +33,7 @@ function makeChangedFile(
additions,
deletions,
changedSymbols,
changedLiterals,
tokenEstimate: Math.ceil(patch.length / 4),
};
}
Expand Down Expand Up @@ -178,4 +180,40 @@ describe("findCandidateSections", () => {
const candidates = findCandidateSections(dbChange, index, 1);
expect(candidates.length).toBeLessThanOrEqual(1);
});

it("matches model name change to Configuration section via string literals", () => {
const docs = [
parseDocFile("README.md", README_WITH_CONFIG_TABLE),
];
const index = buildDocIndex(docs);

const modelChange = makeChangedFile(
"src/llm-client.ts",
MODEL_NAME_CHANGE_PATCH,
["DEFAULT_MODELS"],
["gpt-4o", "gpt-4o-mini", "claude-3-5-sonnet-20241022", "gemini-2.5-flash"]
);

const candidates = findCandidateSections(modelChange, index, 5);
expect(candidates.length).toBeGreaterThan(0);

// The Configuration section should be a top candidate because
// it contains the string literals "gpt-4o", "claude-3-5-sonnet-20241022", etc.
const configCandidate = candidates.find(
(c) => c.matchedSection.heading === "Configuration"
);
expect(configCandidate).toBeDefined();
expect(configCandidate!.relevanceScore).toBeGreaterThan(0);
});

it("indexes quoted string values from doc content as keywords", () => {
const doc = parseDocFile("README.md", README_WITH_CONFIG_TABLE);
const configSection = doc.sections.find((s) => s.heading === "Configuration");
expect(configSection).toBeDefined();

// The config table mentions gpt-4o in backtick-quoted inline code
expect(configSection!.keywords).toContain("gpt-4o");
expect(configSection!.keywords).toContain("claude-3-5-sonnet-20241022");
expect(configSection!.keywords).toContain("gemini-2.5-flash");
});
});
1 change: 1 addition & 0 deletions __tests__/doc-patcher.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ function makeChangedFile(overrides?: Partial<ChangedFile>): ChangedFile {
additions: ["+import zustand"],
deletions: ["-import redux"],
changedSymbols: ["useCartStore"],
changedLiterals: [],
tokenEstimate: 50,
...overrides,
};
Expand Down
1 change: 1 addition & 0 deletions __tests__/drift-detector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ function makeCodeFiles() {
.filter((l) => l.startsWith("-") && !l.startsWith("---"))
.map((l) => l.slice(1)),
changedSymbols: ["useCartStore", "cartSlice", "createSlice"],
changedLiterals: [],
tokenEstimate: Math.ceil(REDUX_TO_ZUSTAND_PATCH.length / 4),
},
];
Expand Down
11 changes: 11 additions & 0 deletions __tests__/fixtures/diffs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ export const UNRELATED_CSS_PATCH = `
}
`.trim();

// Fixture: diff that changes a default model name (config value change)
export const MODEL_NAME_CHANGE_PATCH = `
@@ -55,7 +55,7 @@
const DEFAULT_MODELS: Record<LLMProvider, string> = {
- openai: "gpt-4o",
+ openai: "gpt-4o-mini",
anthropic: "claude-3-5-sonnet-20241022",
gemini: "gemini-2.5-flash",
};
`.trim();

// Fixture: GitHub PR file list entries
export function makePRFile(
filename: string,
Expand Down
19 changes: 19 additions & 0 deletions __tests__/fixtures/docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,22 @@ export const UNRELATED_CHANGELOG = `

- Initial release
`.trim();

// Fixture: README with a configuration table mentioning model defaults
export const README_WITH_CONFIG_TABLE = `
# Knowledge Diff

A GitHub Action that detects documentation drift.

## Configuration

| Input | Default | Description |
|---|---|---|
| \`llm-provider\` | \`openai\` | LLM backend: \`openai\`, \`anthropic\`, or \`gemini\`. |
| \`llm-model\` | \`gpt-4o\` / \`claude-3-5-sonnet-20241022\` / \`gemini-2.5-flash\` | Override the model. |
| \`sensitivity\` | \`medium\` | Drift threshold: \`low\`, \`medium\`, \`high\`. |

## How It Works

The action parses the PR diff and matches code changes against documentation sections.
`.trim();
1 change: 1 addition & 0 deletions __tests__/pr-commenter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ function makeChangedFile(overrides?: Partial<ChangedFile>): ChangedFile {
additions: ["+import zustand"],
deletions: ["-import redux"],
changedSymbols: ["useCartStore"],
changedLiterals: [],
tokenEstimate: 50,
...overrides,
};
Expand Down
56 changes: 54 additions & 2 deletions dist/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -79409,6 +79409,42 @@ const JS_KEYWORDS = new Set([
"from", "import", "export", "return", "const", "class", "async",
"await", "true", "false", "null", "undefined", "this", "super",
]);
// ─── String Literal Extraction ────────────────────────────────────────────────
/**
* Captures quoted string values from changed lines.
* Matches strings like "gpt-4o-mini", 'openai', "/api/v2/users", etc.
* Minimum length 3, must start with alphanumeric to filter punctuation-only values.
*/
const STRING_LITERAL_RE = /["']([a-zA-Z0-9/][a-zA-Z0-9_./@:-]{2,})["']/g;
/** Common non-architectural strings to ignore during literal extraction. */
const LITERAL_STOPWORDS = new Set([
"use strict", "utf-8", "utf8", "ascii", "base64", "hex",
"GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS",
"get", "post", "put", "delete", "patch", "head", "options",
"text/plain", "text/html", "application/json",
"Content-Type", "content-type", "Authorization", "authorization",
"string", "number", "boolean", "object", "function",
"node_modules", "package.json", "tsconfig.json",
"click", "submit", "change", "input", "keydown", "keyup",
"div", "span", "button", "form", "table",
]);
/**
* Extract meaningful string literal values from changed lines.
* These capture configuration values, model names, URLs, library names, etc.
*/
function extractStringLiterals(lines) {
const literals = new Set();
const text = lines.join("\n");
STRING_LITERAL_RE.lastIndex = 0;
let match;
while ((match = STRING_LITERAL_RE.exec(text)) !== null) {
const value = match[1];
if (!LITERAL_STOPWORDS.has(value)) {
literals.add(value);
}
}
return Array.from(literals);
}
// ─── File Extension Check ─────────────────────────────────────────────────────
function isCodeFile(filePath, allowedExtensions) {
const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
Expand Down Expand Up @@ -79447,13 +79483,16 @@ function parsePRFiles(files, allowedExtensions, maxFiles) {
}
const { additions, deletions } = parsePatchLines(file.patch);
// Only extract symbols from *changed* lines (not context lines)
const changedSymbols = extractSymbols([...additions, ...deletions]);
const changedLines = [...additions, ...deletions];
const changedSymbols = extractSymbols(changedLines);
const changedLiterals = extractStringLiterals(changedLines);
changedFiles.push({
filePath: file.filename,
patch: file.patch,
additions,
deletions,
changedSymbols,
changedLiterals,
tokenEstimate: estimateTokens(file.patch),
});
processed++;
Expand Down Expand Up @@ -112037,6 +112076,8 @@ class LLMClient {
// ─── Shared Constants ─────────────────────────────────────────────────────────
/** Technology keywords that signal architecture intent. Shared across keyword extraction and candidate matching. */
const TECH_KEYWORD_RE = /\b(redux|zustand|mobx|recoil|jotai|react|vue|angular|express|fastapi|django|rails|postgres|mysql|mongodb|graphql|rest|grpc|websocket|kafka|rabbitmq|redis|docker|kubernetes|aws|gcp|azure)\b/gi;
/** Captures meaningful quoted string values from documentation content (model names, config values, etc.). */
const DOC_STRING_LITERAL_RE = /["'`]([a-zA-Z0-9][a-zA-Z0-9_./@:-]{2,})["'`]/g;
// ─── Markdown Section Splitting ───────────────────────────────────────────────
const HEADING_RE = /^(#{1,6})\s+(.+)$/;
/**
Expand Down Expand Up @@ -112135,6 +112176,11 @@ function extractKeywords(heading, content) {
for (const m of content.matchAll(TECH_KEYWORD_RE)) {
kw.add(m[1].toLowerCase());
}
// Quoted string values in documentation (model names, config values, URLs, etc.)
// These are critical for matching diffs that change string literal values.
for (const m of content.matchAll(DOC_STRING_LITERAL_RE)) {
kw.add(m[1].toLowerCase());
}
return Array.from(kw);
}
function buildIndex(docFiles) {
Expand Down Expand Up @@ -112182,6 +112228,12 @@ function findCandidateSections(changedFile, index, topN = 3) {
for (const m of changeText.matchAll(TECH_KEYWORD_RE)) {
queryTerms.add(m[1].toLowerCase());
}
// String literal values from the diff (model names, config values, URLs, etc.)
if (changedFile.changedLiterals) {
for (const lit of changedFile.changedLiterals) {
queryTerms.add(lit.toLowerCase());
}
}
// Score sections by how many query terms they match
for (const term of queryTerms) {
const sections = index.get(term) ?? [];
Expand Down Expand Up @@ -112270,7 +112322,7 @@ class DriftDetector {
let totalCandidates = 0;
for (const changedFile of changedFiles) {
info(`Analysing: ${changedFile.filePath}`);
const candidates = findCandidateSections(changedFile, docIndex, 3);
const candidates = findCandidateSections(changedFile, docIndex, 6);
totalCandidates += candidates.length;
if (candidates.length === 0) {
core_debug(` No candidate doc sections found for ${changedFile.filePath}`);
Expand Down
2 changes: 1 addition & 1 deletion dist/index.js.map

Large diffs are not rendered by default.

47 changes: 46 additions & 1 deletion src/diff-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,48 @@ const JS_KEYWORDS = new Set([
"await", "true", "false", "null", "undefined", "this", "super",
]);

// ─── String Literal Extraction ────────────────────────────────────────────────

/**
* Captures quoted string values from changed lines.
* Matches strings like "gpt-4o-mini", 'openai', "/api/v2/users", etc.
* Minimum length 3, must start with alphanumeric to filter punctuation-only values.
*/
const STRING_LITERAL_RE = /["']([a-zA-Z0-9/][a-zA-Z0-9_./@:-]{2,})["']/g;

/** Common non-architectural strings to ignore during literal extraction. */
const LITERAL_STOPWORDS = new Set([
"use strict", "utf-8", "utf8", "ascii", "base64", "hex",
"GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS",
"get", "post", "put", "delete", "patch", "head", "options",
"text/plain", "text/html", "application/json",
"Content-Type", "content-type", "Authorization", "authorization",
"string", "number", "boolean", "object", "function",
"node_modules", "package.json", "tsconfig.json",
"click", "submit", "change", "input", "keydown", "keyup",
"div", "span", "button", "form", "table",
]);

/**
* Extract meaningful string literal values from changed lines.
* These capture configuration values, model names, URLs, library names, etc.
*/
export function extractStringLiterals(lines: string[]): string[] {
const literals = new Set<string>();
const text = lines.join("\n");

STRING_LITERAL_RE.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = STRING_LITERAL_RE.exec(text)) !== null) {
const value = match[1];
if (!LITERAL_STOPWORDS.has(value)) {
literals.add(value);
}
}

return Array.from(literals);
}

// ─── File Extension Check ─────────────────────────────────────────────────────

export function isCodeFile(
Expand Down Expand Up @@ -125,14 +167,17 @@ export function parsePRFiles(
const { additions, deletions } = parsePatchLines(file.patch);

// Only extract symbols from *changed* lines (not context lines)
const changedSymbols = extractSymbols([...additions, ...deletions]);
const changedLines = [...additions, ...deletions];
const changedSymbols = extractSymbols(changedLines);
const changedLiterals = extractStringLiterals(changedLines);

changedFiles.push({
filePath: file.filename,
patch: file.patch,
additions,
deletions,
changedSymbols,
changedLiterals,
tokenEstimate: estimateTokens(file.patch),
});

Expand Down
Loading