From 56a7c567f0f44e804c0dbcf49c3b6541720fb8f4 Mon Sep 17 00:00:00 2001 From: Ben Vinegar Date: Thu, 14 May 2026 23:39:12 -0400 Subject: [PATCH] refactor: extract shared patch helpers --- src/core/diffFile.ts | 85 +++ src/core/git.ts | 34 ++ src/core/loaders.ts | 522 +----------------- src/core/pager.ts | 10 +- src/core/patch/chunks.ts | 69 +++ src/core/patch/gitFormat.ts | 260 +++++++++ .../gitLog.test.ts} | 2 +- src/core/patch/gitLog.ts | 57 ++ src/core/patch/normalize.ts | 18 + src/core/vcs/git.ts | 19 +- src/opentui/HunkDiffView.test.tsx | 14 + src/opentui/model.ts | 81 +-- 12 files changed, 555 insertions(+), 616 deletions(-) create mode 100644 src/core/diffFile.ts create mode 100644 src/core/patch/chunks.ts create mode 100644 src/core/patch/gitFormat.ts rename src/core/{loaders.gitLog.test.ts => patch/gitLog.test.ts} (99%) create mode 100644 src/core/patch/gitLog.ts create mode 100644 src/core/patch/normalize.ts diff --git a/src/core/diffFile.ts b/src/core/diffFile.ts new file mode 100644 index 00000000..a19ed25d --- /dev/null +++ b/src/core/diffFile.ts @@ -0,0 +1,85 @@ +import { getFiletypeFromFileName, type FileDiffMetadata } from "@pierre/diffs"; +import { findAgentFileContext } from "./agent"; +import { patchLooksBinary } from "./binary"; +import { normalizeDiffMetadataPaths, normalizeDiffPath } from "./diffPaths"; +import type { AgentContext, DiffFile } from "./types"; + +/** Count visible additions and deletions from parsed diff metadata. */ +export function countDiffStats(metadata: FileDiffMetadata) { + let additions = 0; + let deletions = 0; + + for (const hunk of metadata.hunks) { + for (const content of hunk.hunkContent) { + if (content.type === "change") { + additions += content.additions; + deletions += content.deletions; + } + } + } + + return { additions, deletions }; +} + +export interface BuildDiffFileOptions { + isUntracked?: boolean; + previousPath?: string; + isBinary?: boolean; + isTooLarge?: boolean; + stats?: DiffFile["stats"]; + statsTruncated?: boolean; +} + +/** Build the normalized per-file model used by the UI regardless of input mode. */ +export function buildDiffFile( + metadata: FileDiffMetadata, + patch: string, + index: number, + sourcePrefix: string, + agentContext: AgentContext | null, + { + isUntracked, + previousPath, + isBinary, + isTooLarge, + stats, + statsTruncated, + }: BuildDiffFileOptions = {}, +): DiffFile { + const normalizedMetadata = normalizeDiffMetadataPaths(metadata); + const path = normalizedMetadata.name; + const resolvedPreviousPath = normalizeDiffPath(previousPath) ?? normalizedMetadata.prevName; + + return { + id: `${sourcePrefix}:${index}:${path}`, + path, + previousPath: resolvedPreviousPath, + patch, + language: getFiletypeFromFileName(path) ?? undefined, + stats: stats ?? countDiffStats(normalizedMetadata), + metadata: normalizedMetadata, + agent: findAgentFileContext(agentContext, path, resolvedPreviousPath), + isUntracked, + isBinary: isBinary ?? patchLooksBinary(patch), + isTooLarge, + statsTruncated, + }; +} + +/** Build placeholder metadata for a file whose full diff would be too expensive. */ +export function createSkippedLargeMetadata( + filePath: string, + type: FileDiffMetadata["type"], +): FileDiffMetadata { + return { + name: filePath, + type, + hunks: [], + splitLineCount: 0, + unifiedLineCount: 0, + isPartial: true, + additionLines: [], + deletionLines: [], + cacheKey: `${filePath}:large-diff-skipped`, + }; +} diff --git a/src/core/git.ts b/src/core/git.ts index f2f6243a..6ac8e05a 100644 --- a/src/core/git.ts +++ b/src/core/git.ts @@ -450,6 +450,40 @@ export function listGitUntrackedFiles( ); } +/** Escape only the filename characters that break unified-diff header parsing. */ +function escapeUntrackedPatchPath(path: string) { + return path + .replaceAll("\\", "\\\\") + .replaceAll("\t", "\\t") + .replaceAll("\n", "\\n") + .replaceAll("\r", "\\r"); +} + +/** Rewrite Git's quoted untracked-file headers into parser-friendly paths. */ +export function normalizeUntrackedPatchHeaders(patchText: string, filePath: string) { + const safePath = escapeUntrackedPatchPath(filePath); + + return patchText + .replaceAll("\r\n", "\n") + .split("\n") + .map((line) => { + if (line.startsWith("diff --git ")) { + return `diff --git a/${safePath} b/${safePath}`; + } + + if (line.startsWith("+++ ")) { + return `+++ b/${safePath}`; + } + + if (line.startsWith("Binary files /dev/null and ")) { + return `Binary files /dev/null and b/${safePath} differ`; + } + + return line; + }) + .join("\n"); +} + /** Return the raw Git patch text for one untracked file using `git diff --no-index`. */ export function runGitUntrackedFileDiffText( input: VcsCommandInput, diff --git a/src/core/loaders.ts b/src/core/loaders.ts index a2131287..36899752 100644 --- a/src/core/loaders.ts +++ b/src/core/loaders.ts @@ -1,5 +1,4 @@ import { - getFiletypeFromFileName, parseDiffFromFile, parsePatchFiles, type FileContents, @@ -9,9 +8,11 @@ import { createTwoFilesPatch } from "diff"; import fs from "node:fs"; import { join, resolve as resolvePath } from "node:path"; import { findAgentFileContext, loadAgentContext } from "./agent"; -import { createSkippedBinaryMetadata, isProbablyBinaryFile, patchLooksBinary } from "./binary"; -import { normalizeDiffMetadataPaths, normalizeDiffPath } from "./diffPaths"; -import { runGitUntrackedFileDiffText } from "./git"; +import { createSkippedBinaryMetadata, isProbablyBinaryFile } from "./binary"; +import { normalizeUntrackedPatchHeaders, runGitUntrackedFileDiffText } from "./git"; +import { splitPatchIntoFileChunks, findPatchChunk } from "./patch/chunks"; +import { buildDiffFile, createSkippedLargeMetadata } from "./diffFile"; +import { normalizePatchText } from "./patch/normalize"; import { createUnsupportedVcsOperationError, getVcsAdapter, operationFromInput } from "./vcs"; import type { AppBootstrap, @@ -40,497 +41,6 @@ function basename(path: string) { return path.split(/[\\/]/).filter(Boolean).pop() ?? path; } -/** Remove git-style a/ and b/ prefixes before matching diff paths. */ -function stripPrefixes(path: string) { - return path.replace(/^[ab]\//, ""); -} - -/** Remove terminal escape sequences so Git-colored pager input still parses as plain patch text. */ -function stripTerminalControl(text: string) { - return text - .replace(/\x1bP[\s\S]*?\x1b\\/g, "") - .replace(/\x1b\][\s\S]*?(?:\x07|\x1b\\)/g, "") - .replace(/\x1b\[[0-?]*[ -/]*[@-~]/g, "") - .replace(/\x1b[@-_]/g, ""); -} - -/** - * Strip `git log -p` / `git show -p` commit metadata so the surviving text - * is a plain patch stream that `@pierre/diffs` can parse without spamming - * `parseLineType: Invalid firstChar` warnings on every commit boundary. - * - * Each commit in `git log -p` looks like: - * - * ``` - * commit [ (refs)] - * Author: ... - * Date: ... - * - * - * - * diff --git a/foo b/foo - * ... - * ``` - * - * Lines from `commit ` through the first patch header (`diff --git `, - * `--- `, or `+++ `) are dropped. Hunk-body lines always start with - * `+`, `-`, ` ` or `\`, so a real context line that begins with the word - * "commit" is unaffected (its leading space prevents the regex match). - * - * Returns the input unchanged when no `commit ` boundary is present, - * keeping the regular patch path zero-cost. - */ -export function stripGitLogMetadata(text: string) { - // Hex range up to 64 covers both SHA-1 (40) and SHA-256 (64) repos. - const COMMIT_BOUNDARY = /^commit [0-9a-f]{4,64}(?: |$)/m; - if (!COMMIT_BOUNDARY.test(text)) { - return text; - } - - const lines = text.split("\n"); - const out: string[] = []; - let inHeader = false; - - for (const line of lines) { - if (COMMIT_BOUNDARY.test(line)) { - inHeader = true; - continue; - } - if (inHeader) { - // The header section ends at the first patch line. `diff --git ` - // is the canonical Git start; `--- `/`+++ ` cover unified-diff - // input where someone synthesised log output without it. - if (line.startsWith("diff --git ") || line.startsWith("--- ") || line.startsWith("+++ ")) { - inHeader = false; - out.push(line); - } - continue; - } - out.push(line); - } - - return out.join("\n"); -} - -/** Split a multi-file patch into per-file chunks so each diff file keeps its original patch text. */ -function splitPatchIntoFileChunks(rawPatch: string) { - const patch = rawPatch.replaceAll("\r\n", "\n"); - const lines = patch.split("\n"); - const chunks: string[] = []; - let current: string[] = []; - const hasGitHeaders = lines.some((line) => line.startsWith("diff --git ")); - - const flush = () => { - if (current.length > 0) { - chunks.push(`${current.join("\n").trimEnd()}\n`); - current = []; - } - }; - - for (let index = 0; index < lines.length; index += 1) { - const line = lines[index]!; - - if (hasGitHeaders && line.startsWith("diff --git ")) { - flush(); - current.push(line); - continue; - } - - if (!hasGitHeaders && line.startsWith("--- ") && lines[index + 1]?.startsWith("+++ ")) { - flush(); - current.push(line); - current.push(lines[index + 1]!); - index += 1; - continue; - } - - if (current.length > 0) { - current.push(line); - } - } - - flush(); - return chunks; -} - -/** Count visible additions and deletions from parsed diff metadata. */ -function countDiffStats(metadata: FileDiffMetadata) { - let additions = 0; - let deletions = 0; - - for (const hunk of metadata.hunks) { - for (const content of hunk.hunkContent) { - if (content.type === "change") { - additions += content.additions; - deletions += content.deletions; - } - } - } - - return { additions, deletions }; -} - -/** Recover the original patch chunk for one parsed file, preferring index order before path matching. */ -function findPatchChunk(metadata: FileDiffMetadata, chunks: string[], index: number) { - const byIndex = chunks[index]; - if (byIndex) { - return byIndex; - } - - return ( - chunks.find((chunk) => - [metadata.name, metadata.prevName] - .map(normalizeDiffPath) - .filter((value): value is string => Boolean(value)) - .map(stripPrefixes) - .some( - (path) => - chunk.includes(`a/${path}`) || chunk.includes(`b/${path}`) || chunk.includes(path), - ), - ) ?? "" - ); -} - -interface BuildDiffFileOptions { - isUntracked?: boolean; - previousPath?: string; - isBinary?: boolean; - isTooLarge?: boolean; - stats?: DiffFile["stats"]; - statsTruncated?: boolean; -} - -/** Build the normalized per-file model used by the UI regardless of input mode. */ -function buildDiffFile( - metadata: FileDiffMetadata, - patch: string, - index: number, - sourcePrefix: string, - agentContext: AgentContext | null, - { - isUntracked, - previousPath, - isBinary, - isTooLarge, - stats, - statsTruncated, - }: BuildDiffFileOptions = {}, -): DiffFile { - const normalizedMetadata = normalizeDiffMetadataPaths(metadata); - const path = normalizedMetadata.name; - const resolvedPreviousPath = normalizeDiffPath(previousPath) ?? normalizedMetadata.prevName; - - return { - id: `${sourcePrefix}:${index}:${path}`, - path, - previousPath: resolvedPreviousPath, - patch, - language: getFiletypeFromFileName(path) ?? undefined, - stats: stats ?? countDiffStats(normalizedMetadata), - metadata: normalizedMetadata, - agent: findAgentFileContext(agentContext, path, resolvedPreviousPath), - isUntracked, - isBinary: isBinary ?? patchLooksBinary(patch), - isTooLarge, - statsTruncated, - }; -} - -/** - * Re-add Git's `a/` and `b/` path prefixes to patch headers when stdin came from a - * `git diff` that was emitted with `diff.noprefix=true` (or otherwise stripped prefixes). - * - * `@pierre/diffs` requires `a/` and `b/` on `diff --git`, `---`, and `+++` lines and throws - * a `TypeError` on the first noprefix header, which leaves the review with zero files. The - * git-backed paths force `diff.noprefix=false` when they invoke git internally; this helper - * covers the patch path (`hunk patch`, `hunk pager`) where the input was produced by an - * outer `git` process we do not control. - * - * The rewrite is scoped to header lines only: once the `+++ ` line has been emitted for a - * block we clear the flag so a deleted line whose content starts with `-- ` (e.g. a removed - * SQL/Lua/Haskell comment, which becomes `--- foo` on disk) is not mistaken for a file - * header inside the hunk body. - */ -type GitHeaderRewriteMode = "add" | "strip"; - -function normalizeGitPatchPrefixes(patchText: string) { - if (!patchText.includes("diff --git ")) { - return patchText; - } - - const lines = patchText.split("\n"); - const normalizedLines: string[] = []; - let blockLines: string[] = []; - - const flushBlock = () => { - if (blockLines.length === 0) { - return; - } - - for (const line of rewriteGitPatchBlock(blockLines)) { - normalizedLines.push(line); - } - blockLines = []; - }; - - for (const line of lines) { - if (line.startsWith("diff --git ")) { - flushBlock(); - blockLines.push(line); - continue; - } - - if (blockLines.length > 0) { - blockLines.push(line); - } else { - normalizedLines.push(line); - } - } - - flushBlock(); - return normalizedLines.join("\n"); -} - -/** Rewrite one `diff --git` block, keeping file-header rewrites out of hunk bodies. */ -function rewriteGitPatchBlock(blockLines: string[]) { - const firstLine = blockLines[0]; - if (!firstLine?.startsWith("diff --git ")) { - return blockLines; - } - - const result = rewriteGitDiffHeader(firstLine, blockLines); - let blockRewriteMode = result.rewriteMode; - - const rewrittenLines = [result.line]; - - for (const line of blockLines.slice(1)) { - if (blockRewriteMode && line.startsWith("--- ")) { - rewrittenLines.push(rewriteUnifiedFileLine(line, "--- ", "a/", blockRewriteMode)); - continue; - } - - if (blockRewriteMode && line.startsWith("+++ ")) { - const rewriteMode = blockRewriteMode; - blockRewriteMode = null; - rewrittenLines.push(rewriteUnifiedFileLine(line, "+++ ", "b/", rewriteMode)); - continue; - } - - rewrittenLines.push(line); - } - - return rewrittenLines; -} - -/** Detect prefixed/noprefix `diff --git` lines and rewrite them into Pierre's `a/X b/Y` form. */ -function rewriteGitDiffHeader( - line: string, - blockLines: string[], -): { - line: string; - rewriteMode: GitHeaderRewriteMode | null; -} { - const rest = line.slice("diff --git ".length).trimEnd(); - - const quotedMatch = rest.match(/^"((?:\\.|[^"\\])*)" "((?:\\.|[^"\\])*)"$/); - if (quotedMatch) { - const [, oldPath = "", newPath = ""] = quotedMatch; - const pair = canonicalizeGitPathPair(oldPath, newPath, blockLines); - // Pierre's git header parser does not currently handle the quoted `"a/..." "b/..."` - // form, so canonicalize quoted paths to the unquoted form even when prefixes exist. - return { line: `diff --git ${pair.oldPath} ${pair.newPath}`, rewriteMode: pair.rewriteMode }; - } - - const tokens = rest.split(" "); - - if (tokens.length >= 2 && tokens.length % 2 === 0) { - const half = tokens.length / 2; - const firstHalf = tokens.slice(0, half).join(" "); - const secondHalf = tokens.slice(half).join(" "); - const knownPair = canonicalizeKnownGitPathPair(firstHalf, secondHalf, blockLines); - - if (knownPair?.changed) { - return { - line: `diff --git ${knownPair.oldPath} ${knownPair.newPath}`, - rewriteMode: knownPair.rewriteMode, - }; - } - - // Already prefixed: `a/X b/Y` (covers single-token and equally split multi-token paths). - if (knownPair?.isCanonical) { - return { line, rewriteMode: null }; - } - - // Non-rename noprefix: identical halves regardless of whether the path contains spaces. - if (firstHalf === secondHalf && firstHalf.length > 0) { - return { line: `diff --git a/${firstHalf} b/${secondHalf}`, rewriteMode: "add" }; - } - } - - // Two-token rename without prefix and without spaces in either path. - if (tokens.length === 2 && tokens[0] && tokens[1]) { - return { line: `diff --git a/${tokens[0]} b/${tokens[1]}`, rewriteMode: "add" }; - } - - // Genuinely ambiguous (rename with spaces and no quoting). Leave untouched and let the - // parser surface the existing failure rather than guess at the path split. - return { line, rewriteMode: null }; -} - -const GIT_MNEMONIC_PREFIXES = new Set(["c", "i", "o", "w", "1", "2"]); - -/** Return one Git mnemonic side prefix from a path, if present. */ -function splitGitMnemonicPrefix(path: string) { - const [prefix, ...rest] = path.split("/"); - if (!prefix || rest.length === 0 || !GIT_MNEMONIC_PREFIXES.has(prefix)) { - return null; - } - - return { prefix, path: rest.join("/") }; -} - -/** Remove Git's outer quotes from one path-like metadata value. */ -function stripGitPathQuotes(path: string) { - return path.match(/^"((?:\\.|[^"\\])*)"$/)?.[1] ?? path; -} - -/** Return rename metadata, which Git writes without mnemonic side prefixes. */ -function findRenameMetadata(blockLines: string[]) { - const oldPath = blockLines.find((line) => line.startsWith("rename from ")); - const newPath = blockLines.find((line) => line.startsWith("rename to ")); - - if (!oldPath || !newPath) { - return null; - } - - return { - oldPath: stripGitPathQuotes(oldPath.slice("rename from ".length)), - newPath: stripGitPathQuotes(newPath.slice("rename to ".length)), - }; -} - -/** Return a path with the expected Git side prefix while avoiding double-prefixing. */ -function withGitPrefix(path: string, prefix: "a/" | "b/") { - return path.startsWith(prefix) ? path : `${prefix}${path}`; -} - -/** Decide whether a mnemonic-looking path pair is real mnemonic output or a noprefix rename. */ -function shouldStripMnemonicPair(oldPath: string, newPath: string, blockLines: string[]) { - const oldMnemonic = splitGitMnemonicPrefix(oldPath); - const newMnemonic = splitGitMnemonicPrefix(newPath); - - if (!oldMnemonic || !newMnemonic || oldMnemonic.prefix === newMnemonic.prefix) { - return null; - } - - const rename = findRenameMetadata(blockLines); - if (!rename) { - return true; - } - - if (rename.oldPath === oldPath && rename.newPath === newPath) { - return false; - } - - if (rename.oldPath === oldMnemonic.path && rename.newPath === newMnemonic.path) { - return true; - } - - return true; -} - -/** Convert already-prefixed or mnemonic-prefixed path pairs into Pierre's canonical shape. */ -function canonicalizeKnownGitPathPair(oldPath: string, newPath: string, blockLines: string[]) { - const oldMnemonic = splitGitMnemonicPrefix(oldPath); - const newMnemonic = splitGitMnemonicPrefix(newPath); - const isCanonical = oldPath.startsWith("a/") && newPath.startsWith("b/"); - - if (isCanonical) { - return { oldPath, newPath, rewriteMode: "add" as const, changed: false, isCanonical: true }; - } - - if (oldMnemonic && newMnemonic && shouldStripMnemonicPair(oldPath, newPath, blockLines)) { - return { - oldPath: `a/${oldMnemonic.path}`, - newPath: `b/${newMnemonic.path}`, - rewriteMode: "strip" as const, - changed: true, - isCanonical: false, - }; - } - - return null; -} - -/** Convert one quoted `diff --git` path pair into Pierre's canonical side-prefix shape. */ -function canonicalizeGitPathPair(oldPath: string, newPath: string, blockLines: string[]) { - return ( - canonicalizeKnownGitPathPair(oldPath, newPath, blockLines) ?? { - oldPath: withGitPrefix(oldPath, "a/"), - newPath: withGitPrefix(newPath, "b/"), - rewriteMode: "add" as const, - changed: true, - isCanonical: false, - } - ); -} - -/** Insert the canonical `a/` or `b/` prefix on a unified-diff header that is missing it. */ -function rewriteUnifiedFileLine( - line: string, - marker: "--- " | "+++ ", - prefix: "a/" | "b/", - mode: GitHeaderRewriteMode, -) { - const path = line.slice(marker.length); - const quotedPath = path.match(/^"((?:\\.|[^"\\])*)"(.*)$/); - const pathName = quotedPath?.[1] ?? path; - const suffix = quotedPath?.[2] ?? ""; - - if (pathName === "/dev/null" || pathName.startsWith("/dev/null\t")) { - return line; - } - - const normalizedPath = - mode === "strip" ? (splitGitMnemonicPrefix(pathName)?.path ?? pathName) : pathName; - - return `${marker}${withGitPrefix(normalizedPath, prefix)}${suffix}`; -} - -/** Escape only the filename characters that break unified-diff header parsing. */ -function escapeUntrackedPatchPath(path: string) { - return path - .replaceAll("\\", "\\\\") - .replaceAll("\t", "\\t") - .replaceAll("\n", "\\n") - .replaceAll("\r", "\\r"); -} - -/** Rewrite Git's quoted untracked-file headers into parser-friendly paths. */ -function normalizeUntrackedPatchHeaders(patchText: string, filePath: string) { - const safePath = escapeUntrackedPatchPath(filePath); - - return patchText - .replaceAll("\r\n", "\n") - .split("\n") - .map((line) => { - if (line.startsWith("diff --git ")) { - return `diff --git a/${safePath} b/${safePath}`; - } - - if (line.startsWith("+++ ")) { - return `+++ b/${safePath}`; - } - - if (line.startsWith("Binary files /dev/null and ")) { - return `Binary files /dev/null and b/${safePath} differ`; - } - - return line; - }) - .join("\n"); -} - interface CountedLines { complete: boolean; lines: number; @@ -606,24 +116,6 @@ function inspectLargeUntrackedFile(repoRoot: string, filePath: string): LargeUnt }; } -/** Build placeholder metadata for a file whose full diff would be too expensive. */ -function createSkippedLargeMetadata( - filePath: string, - type: FileDiffMetadata["type"], -): FileDiffMetadata { - return { - name: filePath, - type, - hunks: [], - splitLineCount: 0, - unifiedLineCount: 0, - isPartial: true, - additionLines: [], - deletionLines: [], - cacheKey: `${filePath}:large-diff-skipped`, - }; -} - /** Parse one synthetic untracked-file patch and reattach the real path after header normalization. */ function parseUntrackedPatchFile(patchText: string, filePath: string) { let parsedPatches: ReturnType; @@ -739,9 +231,7 @@ function normalizePatchChangeset( sourceLabel: string, agentContext: AgentContext | null, ): Changeset { - const normalizedPatchText = normalizeGitPatchPrefixes( - stripGitLogMetadata(stripTerminalControl(patchText.replaceAll("\r\n", "\n"))), - ); + const normalizedPatchText = normalizePatchText(patchText); let parsedPatches: ReturnType; try { diff --git a/src/core/pager.ts b/src/core/pager.ts index 67b574c1..8b146218 100644 --- a/src/core/pager.ts +++ b/src/core/pager.ts @@ -1,14 +1,6 @@ import { spawn, type ChildProcess, type SpawnOptions } from "node:child_process"; import { once } from "node:events"; - -/** Remove terminal escape sequences before deciding whether stdin looks like a patch. */ -function stripTerminalControl(text: string) { - return text - .replace(/\x1bP[\s\S]*?\x1b\\/g, "") - .replace(/\x1b\][\s\S]*?(?:\x07|\x1b\\)/g, "") - .replace(/\x1b\[[0-?]*[ -/]*[@-~]/g, "") - .replace(/\x1b[@-_]/g, ""); -} +import { stripTerminalControl } from "./patch/normalize"; /** Detect whether generic pager stdin looks like a diff/patch that Hunk should review. */ export function looksLikePatchInput(text: string) { diff --git a/src/core/patch/chunks.ts b/src/core/patch/chunks.ts new file mode 100644 index 00000000..c735cd6f --- /dev/null +++ b/src/core/patch/chunks.ts @@ -0,0 +1,69 @@ +import type { FileDiffMetadata } from "@pierre/diffs"; +import { normalizeDiffPath } from "../diffPaths"; + +/** Remove git-style a/ and b/ prefixes before matching diff paths. */ +function stripPrefixes(path: string) { + return path.replace(/^[ab]\//, ""); +} + +/** Split a multi-file patch into per-file chunks so each diff file keeps its original patch text. */ +export function splitPatchIntoFileChunks(rawPatch: string) { + const patch = rawPatch.replaceAll("\r\n", "\n"); + const lines = patch.split("\n"); + const chunks: string[] = []; + let current: string[] = []; + const hasGitHeaders = lines.some((line) => line.startsWith("diff --git ")); + + const flush = () => { + if (current.length > 0) { + chunks.push(`${current.join("\n").trimEnd()}\n`); + current = []; + } + }; + + for (let index = 0; index < lines.length; index += 1) { + const line = lines[index]!; + + if (hasGitHeaders && line.startsWith("diff --git ")) { + flush(); + current.push(line); + continue; + } + + if (!hasGitHeaders && line.startsWith("--- ") && lines[index + 1]?.startsWith("+++ ")) { + flush(); + current.push(line); + current.push(lines[index + 1]!); + index += 1; + continue; + } + + if (current.length > 0) { + current.push(line); + } + } + + flush(); + return chunks; +} + +/** Recover the original patch chunk for one parsed file, preferring index order before path matching. */ +export function findPatchChunk(metadata: FileDiffMetadata, chunks: string[], index: number) { + const byIndex = chunks[index]; + if (byIndex) { + return byIndex; + } + + return ( + chunks.find((chunk) => + [metadata.name, metadata.prevName] + .map(normalizeDiffPath) + .filter((value): value is string => Boolean(value)) + .map(stripPrefixes) + .some( + (path) => + chunk.includes(`a/${path}`) || chunk.includes(`b/${path}`) || chunk.includes(path), + ), + ) ?? "" + ); +} diff --git a/src/core/patch/gitFormat.ts b/src/core/patch/gitFormat.ts new file mode 100644 index 00000000..d7278ab3 --- /dev/null +++ b/src/core/patch/gitFormat.ts @@ -0,0 +1,260 @@ +/** + * Helpers for normalizing Git-format patch syntax. + * + * These helpers are not tied to Git repositories: Jujutsu and other VCS backends can emit + * the same `diff --git` patch format, so the app loader and public OpenTUI API share them. + */ + +/** + * Canonicalize Git-format patch headers into the `a/` and `b/` side prefixes Pierre expects. + * + * This covers patch text produced outside Hunk's controlled VCS commands, where user config or + * another tool may emit noprefix, mnemonic-prefix, or quoted `diff --git` paths. Rewrites are + * intentionally limited to each file header block and stop after the `+++ ` file header so hunk + * body lines that merely look like file headers are preserved verbatim. + */ +type GitHeaderRewriteMode = "add" | "strip"; + +export function normalizeGitPatchPrefixes(patchText: string) { + if (!patchText.includes("diff --git ")) { + return patchText; + } + + const lines = patchText.split("\n"); + const normalizedLines: string[] = []; + let blockLines: string[] = []; + + const flushBlock = () => { + if (blockLines.length === 0) { + return; + } + + for (const line of rewriteGitPatchBlock(blockLines)) { + normalizedLines.push(line); + } + blockLines = []; + }; + + for (const line of lines) { + if (line.startsWith("diff --git ")) { + flushBlock(); + blockLines.push(line); + continue; + } + + if (blockLines.length > 0) { + blockLines.push(line); + } else { + normalizedLines.push(line); + } + } + + flushBlock(); + return normalizedLines.join("\n"); +} + +/** Rewrite one `diff --git` block, keeping file-header rewrites out of hunk bodies. */ +function rewriteGitPatchBlock(blockLines: string[]) { + const firstLine = blockLines[0]; + if (!firstLine?.startsWith("diff --git ")) { + return blockLines; + } + + const result = rewriteGitDiffHeader(firstLine, blockLines); + let blockRewriteMode = result.rewriteMode; + + const rewrittenLines = [result.line]; + + for (const line of blockLines.slice(1)) { + if (blockRewriteMode && line.startsWith("--- ")) { + rewrittenLines.push(rewriteUnifiedFileLine(line, "--- ", "a/", blockRewriteMode)); + continue; + } + + if (blockRewriteMode && line.startsWith("+++ ")) { + const rewriteMode = blockRewriteMode; + blockRewriteMode = null; + rewrittenLines.push(rewriteUnifiedFileLine(line, "+++ ", "b/", rewriteMode)); + continue; + } + + rewrittenLines.push(line); + } + + return rewrittenLines; +} + +/** Detect prefixed/noprefix `diff --git` lines and rewrite them into Pierre's `a/X b/Y` form. */ +function rewriteGitDiffHeader( + line: string, + blockLines: string[], +): { + line: string; + rewriteMode: GitHeaderRewriteMode | null; +} { + const rest = line.slice("diff --git ".length).trimEnd(); + + const quotedMatch = rest.match(/^"((?:\\.|[^"\\])*)" "((?:\\.|[^"\\])*)"$/); + if (quotedMatch) { + const [, oldPath = "", newPath = ""] = quotedMatch; + const pair = canonicalizeGitPathPair(oldPath, newPath, blockLines); + // Pierre's git header parser does not currently handle the quoted `"a/..." "b/..."` + // form, so canonicalize quoted paths to the unquoted form even when prefixes exist. + return { line: `diff --git ${pair.oldPath} ${pair.newPath}`, rewriteMode: pair.rewriteMode }; + } + + const tokens = rest.split(" "); + + if (tokens.length >= 2 && tokens.length % 2 === 0) { + const half = tokens.length / 2; + const firstHalf = tokens.slice(0, half).join(" "); + const secondHalf = tokens.slice(half).join(" "); + const knownPair = canonicalizeKnownGitPathPair(firstHalf, secondHalf, blockLines); + + if (knownPair?.changed) { + return { + line: `diff --git ${knownPair.oldPath} ${knownPair.newPath}`, + rewriteMode: knownPair.rewriteMode, + }; + } + + // Already prefixed: `a/X b/Y` (covers single-token and equally split multi-token paths). + if (knownPair?.isCanonical) { + return { line, rewriteMode: null }; + } + + // Non-rename noprefix: identical halves regardless of whether the path contains spaces. + if (firstHalf === secondHalf && firstHalf.length > 0) { + return { line: `diff --git a/${firstHalf} b/${secondHalf}`, rewriteMode: "add" }; + } + } + + // Two-token rename without prefix and without spaces in either path. + if (tokens.length === 2 && tokens[0] && tokens[1]) { + return { line: `diff --git a/${tokens[0]} b/${tokens[1]}`, rewriteMode: "add" }; + } + + // Genuinely ambiguous (rename with spaces and no quoting). Leave untouched and let the + // parser surface the existing failure rather than guess at the path split. + return { line, rewriteMode: null }; +} + +const GIT_MNEMONIC_PREFIXES = new Set(["c", "i", "o", "w", "1", "2"]); + +/** Return one Git mnemonic side prefix from a path, if present. */ +function splitGitMnemonicPrefix(path: string) { + const [prefix, ...rest] = path.split("/"); + if (!prefix || rest.length === 0 || !GIT_MNEMONIC_PREFIXES.has(prefix)) { + return null; + } + + return { prefix, path: rest.join("/") }; +} + +/** Remove Git's outer quotes from one path-like metadata value. */ +function stripGitPathQuotes(path: string) { + return path.match(/^"((?:\\.|[^"\\])*)"$/)?.[1] ?? path; +} + +/** Return rename metadata, which Git writes without mnemonic side prefixes. */ +function findRenameMetadata(blockLines: string[]) { + const oldPath = blockLines.find((line) => line.startsWith("rename from ")); + const newPath = blockLines.find((line) => line.startsWith("rename to ")); + + if (!oldPath || !newPath) { + return null; + } + + return { + oldPath: stripGitPathQuotes(oldPath.slice("rename from ".length)), + newPath: stripGitPathQuotes(newPath.slice("rename to ".length)), + }; +} + +/** Return a path with the expected Git side prefix while avoiding double-prefixing. */ +function withGitPrefix(path: string, prefix: "a/" | "b/") { + return path.startsWith(prefix) ? path : `${prefix}${path}`; +} + +/** Decide whether a mnemonic-looking path pair is real mnemonic output or a noprefix rename. */ +function shouldStripMnemonicPair(oldPath: string, newPath: string, blockLines: string[]) { + const oldMnemonic = splitGitMnemonicPrefix(oldPath); + const newMnemonic = splitGitMnemonicPrefix(newPath); + + if (!oldMnemonic || !newMnemonic || oldMnemonic.prefix === newMnemonic.prefix) { + return null; + } + + const rename = findRenameMetadata(blockLines); + if (!rename) { + return true; + } + + if (rename.oldPath === oldPath && rename.newPath === newPath) { + return false; + } + + if (rename.oldPath === oldMnemonic.path && rename.newPath === newMnemonic.path) { + return true; + } + + return true; +} + +/** Convert already-prefixed or mnemonic-prefixed path pairs into Pierre's canonical shape. */ +function canonicalizeKnownGitPathPair(oldPath: string, newPath: string, blockLines: string[]) { + const oldMnemonic = splitGitMnemonicPrefix(oldPath); + const newMnemonic = splitGitMnemonicPrefix(newPath); + const isCanonical = oldPath.startsWith("a/") && newPath.startsWith("b/"); + + if (isCanonical) { + return { oldPath, newPath, rewriteMode: "add" as const, changed: false, isCanonical: true }; + } + + if (oldMnemonic && newMnemonic && shouldStripMnemonicPair(oldPath, newPath, blockLines)) { + return { + oldPath: `a/${oldMnemonic.path}`, + newPath: `b/${newMnemonic.path}`, + rewriteMode: "strip" as const, + changed: true, + isCanonical: false, + }; + } + + return null; +} + +/** Convert one quoted `diff --git` path pair into Pierre's canonical side-prefix shape. */ +function canonicalizeGitPathPair(oldPath: string, newPath: string, blockLines: string[]) { + return ( + canonicalizeKnownGitPathPair(oldPath, newPath, blockLines) ?? { + oldPath: withGitPrefix(oldPath, "a/"), + newPath: withGitPrefix(newPath, "b/"), + rewriteMode: "add" as const, + changed: true, + isCanonical: false, + } + ); +} + +/** Insert the canonical `a/` or `b/` prefix on a unified-diff header that is missing it. */ +function rewriteUnifiedFileLine( + line: string, + marker: "--- " | "+++ ", + prefix: "a/" | "b/", + mode: GitHeaderRewriteMode, +) { + const path = line.slice(marker.length); + const quotedPath = path.match(/^"((?:\\.|[^"\\])*)"(.*)$/); + const pathName = quotedPath?.[1] ?? path; + const suffix = quotedPath?.[2] ?? ""; + + if (pathName === "/dev/null" || pathName.startsWith("/dev/null\t")) { + return line; + } + + const normalizedPath = + mode === "strip" ? (splitGitMnemonicPrefix(pathName)?.path ?? pathName) : pathName; + + return `${marker}${withGitPrefix(normalizedPath, prefix)}${suffix}`; +} diff --git a/src/core/loaders.gitLog.test.ts b/src/core/patch/gitLog.test.ts similarity index 99% rename from src/core/loaders.gitLog.test.ts rename to src/core/patch/gitLog.test.ts index 228407b3..86d9856b 100644 --- a/src/core/loaders.gitLog.test.ts +++ b/src/core/patch/gitLog.test.ts @@ -1,6 +1,6 @@ import { describe, expect, test } from "bun:test"; import { parsePatchFiles } from "@pierre/diffs"; -import { stripGitLogMetadata } from "./loaders"; +import { stripGitLogMetadata } from "./gitLog"; describe("stripGitLogMetadata", () => { test("returns input unchanged when no commit boundary is present", () => { diff --git a/src/core/patch/gitLog.ts b/src/core/patch/gitLog.ts new file mode 100644 index 00000000..a21c7b53 --- /dev/null +++ b/src/core/patch/gitLog.ts @@ -0,0 +1,57 @@ +/** + * Strip `git log -p` / `git show -p` commit metadata so the surviving text + * is a plain patch stream that `@pierre/diffs` can parse without spamming + * `parseLineType: Invalid firstChar` warnings on every commit boundary. + * + * Each commit in `git log -p` looks like: + * + * ``` + * commit [ (refs)] + * Author: ... + * Date: ... + * + * + * + * diff --git a/foo b/foo + * ... + * ``` + * + * Lines from `commit ` through the first patch header (`diff --git `, + * `--- `, or `+++ `) are dropped. Hunk-body lines always start with + * `+`, `-`, ` ` or `\`, so a real context line that begins with the word + * "commit" is unaffected (its leading space prevents the regex match). + * + * Returns the input unchanged when no `commit ` boundary is present, + * keeping the regular patch path zero-cost. + */ +export function stripGitLogMetadata(text: string) { + // Hex range up to 64 covers both SHA-1 (40) and SHA-256 (64) repos. + const COMMIT_BOUNDARY = /^commit [0-9a-f]{4,64}(?: |$)/m; + if (!COMMIT_BOUNDARY.test(text)) { + return text; + } + + const lines = text.split("\n"); + const out: string[] = []; + let inHeader = false; + + for (const line of lines) { + if (COMMIT_BOUNDARY.test(line)) { + inHeader = true; + continue; + } + if (inHeader) { + // The header section ends at the first patch line. `diff --git ` + // is the canonical Git start; `--- `/`+++ ` cover unified-diff + // input where someone synthesised log output without it. + if (line.startsWith("diff --git ") || line.startsWith("--- ") || line.startsWith("+++ ")) { + inHeader = false; + out.push(line); + } + continue; + } + out.push(line); + } + + return out.join("\n"); +} diff --git a/src/core/patch/normalize.ts b/src/core/patch/normalize.ts new file mode 100644 index 00000000..07f95c49 --- /dev/null +++ b/src/core/patch/normalize.ts @@ -0,0 +1,18 @@ +import { normalizeGitPatchPrefixes } from "./gitFormat"; +import { stripGitLogMetadata } from "./gitLog"; + +/** Remove terminal escape sequences so Git-colored pager input still parses as plain patch text. */ +export function stripTerminalControl(text: string) { + return text + .replace(/\x1bP[\s\S]*?\x1b\\/g, "") + .replace(/\x1b\][\s\S]*?(?:\x07|\x1b\\)/g, "") + .replace(/\x1b\[[0-?]*[ -/]*[@-~]/g, "") + .replace(/\x1b[@-_]/g, ""); +} + +/** Normalize patch text into the parser-friendly form used throughout Hunk. */ +export function normalizePatchText(patchText: string) { + return normalizeGitPatchPrefixes( + stripGitLogMetadata(stripTerminalControl(patchText.replaceAll("\r\n", "\n"))), + ); +} diff --git a/src/core/vcs/git.ts b/src/core/vcs/git.ts index 3780b907..20f72841 100644 --- a/src/core/vcs/git.ts +++ b/src/core/vcs/git.ts @@ -1,6 +1,5 @@ import fs from "node:fs"; import { dirname, join, resolve } from "node:path"; -import type { FileDiffMetadata } from "@pierre/diffs"; import { buildGitDiffArgs, buildGitDiffNumstatArgs, @@ -10,6 +9,7 @@ import { resolveGitRepoRoot, runGitText, } from "../git"; +import { createSkippedLargeMetadata } from "../diffFile"; import type { DiffFile } from "../types"; import type { VcsAdapter } from "./types"; @@ -76,21 +76,6 @@ function shouldSkipLargeTrackedDiff(file: GitNumstatFile, repoRoot: string) { } } -/** Build placeholder metadata for a file whose full diff would be too expensive. */ -function createSkippedLargeMetadata(filePath: string): FileDiffMetadata { - return { - name: filePath, - type: "change", - hunks: [], - splitLineCount: 0, - unifiedLineCount: 0, - isPartial: true, - additionLines: [], - deletionLines: [], - cacheKey: `${filePath}:large-diff-skipped`, - }; -} - /** Build a tracked placeholder for a file whose diff would be too expensive to render. */ function buildSkippedLargeTrackedDiffFile( file: GitNumstatFile, @@ -102,7 +87,7 @@ function buildSkippedLargeTrackedDiffFile( path: file.path, patch: "", stats: { additions: file.additions, deletions: file.deletions }, - metadata: createSkippedLargeMetadata(file.path), + metadata: createSkippedLargeMetadata(file.path, "change"), agent: null, isTooLarge: true, }; diff --git a/src/opentui/HunkDiffView.test.tsx b/src/opentui/HunkDiffView.test.tsx index 80e7adad..14628eae 100644 --- a/src/opentui/HunkDiffView.test.tsx +++ b/src/opentui/HunkDiffView.test.tsx @@ -140,6 +140,20 @@ describe("OpenTUI public components", () => { expect(files[0]?.patch).toContain("diff --git a/example.ts b/example.ts"); }); + test("normalizes noprefix patch text for public file models", () => { + const files = createHunkDiffFilesFromPatch(`diff --git example.ts example.ts +--- example.ts ++++ example.ts +@@ -1 +1 @@ +-before ++after +`); + + expect(files).toHaveLength(1); + expect(files[0]?.path).toBe("example.ts"); + expect(files[0]?.patch).toContain("diff --git a/example.ts b/example.ts"); + }); + test("exports the documented built-in theme names", () => { expect(HUNK_DIFF_THEME_NAMES).toEqual(["graphite", "midnight", "paper", "ember"]); }); diff --git a/src/opentui/model.ts b/src/opentui/model.ts index 0cd8e720..d7396204 100644 --- a/src/opentui/model.ts +++ b/src/opentui/model.ts @@ -1,82 +1,16 @@ -import { parsePatchFiles, type FileDiffMetadata } from "@pierre/diffs"; +import { parsePatchFiles } from "@pierre/diffs"; import { patchLooksBinary } from "../core/binary"; import { normalizeDiffMetadataPaths, normalizeDiffPath } from "../core/diffPaths"; +import { countDiffStats } from "../core/diffFile"; +import { splitPatchIntoFileChunks, findPatchChunk } from "../core/patch/chunks"; +import { normalizePatchText } from "../core/patch/normalize"; import type { DiffFile } from "../core/types"; import type { HunkDiffFile, HunkDiffFileInput } from "./types"; const NORMALIZED_HUNK_DIFF_FILES = new WeakSet(); -/** Split a patch stream into per-file chunks for public OpenTUI file helpers. */ -function splitPatchIntoFileChunks(rawPatch: string) { - const patch = rawPatch.replaceAll("\r\n", "\n"); - const lines = patch.split("\n"); - const chunks: string[] = []; - let current: string[] = []; - const hasGitHeaders = lines.some((line) => line.startsWith("diff --git ")); - - const flush = () => { - if (current.length > 0) { - chunks.push(`${current.join("\n").trimEnd()}\n`); - current = []; - } - }; - - for (let index = 0; index < lines.length; index += 1) { - const line = lines[index]!; - - if (hasGitHeaders && line.startsWith("diff --git ")) { - flush(); - current.push(line); - continue; - } - - if (!hasGitHeaders && line.startsWith("--- ") && lines[index + 1]?.startsWith("+++ ")) { - flush(); - current.push(line); - current.push(lines[index + 1]!); - index += 1; - continue; - } - - if (current.length > 0) { - current.push(line); - } - } - - flush(); - return chunks; -} - -/** Find the original per-file patch chunk for parsed metadata. */ -function findPatchChunk(metadata: FileDiffMetadata, chunks: string[], index: number) { - const byIndex = chunks[index]; - if (byIndex) { - return byIndex; - } - - const paths = [metadata.name, metadata.prevName] - .map(normalizeDiffPath) - .filter((value): value is string => Boolean(value)); - - return chunks.find((chunk) => paths.some((path) => chunk.includes(path))) ?? ""; -} - /** Count visible additions and deletions from Pierre metadata. */ -export function countHunkDiffStats(metadata: FileDiffMetadata) { - let additions = 0; - let deletions = 0; - - for (const hunk of metadata.hunks) { - for (const content of hunk.hunkContent) { - if (content.type === "change") { - additions += content.additions; - deletions += content.deletions; - } - } - } - - return { additions, deletions }; -} +export const countHunkDiffStats = countDiffStats; /** Build Hunk's public OpenTUI file model with normalized paths and default stats. */ export function createHunkDiffFile(input: HunkDiffFileInput): HunkDiffFile { @@ -128,9 +62,10 @@ export function toInternalDiffFile(diff: HunkDiffFileInput): DiffFile { /** Parse unified diff text into Hunk's public OpenTUI file model. */ export function createHunkDiffFilesFromPatch(patchText: string, sourceId = "patch") { - const chunks = splitPatchIntoFileChunks(patchText); + const normalizedPatchText = normalizePatchText(patchText); + const chunks = splitPatchIntoFileChunks(normalizedPatchText); - return parsePatchFiles(patchText, sourceId, true) + return parsePatchFiles(normalizedPatchText, sourceId, true) .flatMap((entry) => entry.files) .map((metadata, index) => createHunkDiffFile({