From 5489e4c5a2581de65bb9d67f83da3e10f2f515b9 Mon Sep 17 00:00:00 2001
From: FinleyGe <m13203533462@163.com>
Date: Tue, 11 Nov 2025 15:38:04 +0800
Subject: [PATCH] perf: docDiff

---
 .../packages/docDiff/src/diffAlgorithm.ts     | 550 ++++++++++++++++++
 modules/tool/packages/docDiff/src/index.ts    | 207 ++-----
 .../packages/docDiff/src/textNormalizer.ts    | 449 ++++++++++++++
 .../docDiff/test/diffAlgorithm.test.ts        | 398 +++++++++++++
 .../test/integration-tolerance.test.ts        |  91 +++
 .../packages/docDiff/test/integration.test.ts |  70 +++
 .../docDiff/test/lineBreakTolerance.test.ts   | 393 +++++++++++++
 .../docDiff/test/lineTolerance.test.ts        | 378 ++++++++++++
 8 files changed, 2363 insertions(+), 173 deletions(-)
 create mode 100644 modules/tool/packages/docDiff/src/diffAlgorithm.ts
 create mode 100644 modules/tool/packages/docDiff/src/textNormalizer.ts
 create mode 100644 modules/tool/packages/docDiff/test/diffAlgorithm.test.ts
 create mode 100644 modules/tool/packages/docDiff/test/integration-tolerance.test.ts
 create mode 100644 modules/tool/packages/docDiff/test/integration.test.ts
 create mode 100644 modules/tool/packages/docDiff/test/lineBreakTolerance.test.ts
 create mode 100644 modules/tool/packages/docDiff/test/lineTolerance.test.ts

diff --git a/modules/tool/packages/docDiff/src/diffAlgorithm.ts b/modules/tool/packages/docDiff/src/diffAlgorithm.ts
new file mode 100644
index 00000000..f1d27303
--- /dev/null
+++ b/modules/tool/packages/docDiff/src/diffAlgorithm.ts
@@ -0,0 +1,550 @@
+// 定义换行容差选项
+export interface LineBreakToleranceOptions {
+  /** 是否启用换行容差逻辑 */
+  enableLineBreakTolerance?: boolean;
+  /** 扫描范围（行数） */
+  scanRange?: number;
+  /** 容差阈值 */
+  toleranceThreshold?: number;
+}
+
+// 定义段落差异类型
+export type DiffType = 'unchanged' | 'added' | 'removed' | 'modified';
+
+export interface ParagraphDiff {
+  type: DiffType;
+  original?: string;
+  modified?: string;
+  lineNumber?: number;
+}
+
+// 分割文档为行
+export function splitIntoLines(text: string): string[] {
+  return text.split('\n');
+}
+
+// 计算两个段的相似度（灵敏版本）
+export function calculateSimilarity(text1: string, text2: string): number {
+  // 如果完全相同，直接返回1.0
+  if (text1 === text2) return 1.0;
+
+  // 计算编辑距离
+  const distance = levenshteinDistance(text1, text2);
+  const maxLength = Math.max(text1.length, text2.length);
+
+  if (maxLength === 0) return 1.0;
+
+  // 转换为相似度（0-1之间）
+  const similarity = 1 - distance / maxLength;
+
+  return similarity;
+}
+
+// 计算编辑距离（Levenshtein距离）
+function levenshteinDistance(str1: string, str2: string): number {
+  const matrix = [];
+
+  for (let i = 0; i <= str2.length; i++) {
+    matrix[i] = [i];
+  }
+
+  for (let j = 0; j <= str1.length; j++) {
+    matrix[0][j] = j;
+  }
+
+  for (let i = 1; i <= str2.length; i++) {
+    for (let j = 1; j <= str1.length; j++) {
+      if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
+        matrix[i][j] = matrix[i - 1][j - 1];
+      } else {
+        matrix[i][j] = Math.min(
+          matrix[i - 1][j - 1] + 1, // 替换
+          matrix[i][j - 1] + 1, // 插入
+          matrix[i - 1][j] + 1 // 删除
+        );
+      }
+    }
+  }
+
+  return matrix[str2.length][str1.length];
+}
+
+// 判断是否为高相似度（应该视为修改）
+export function isHighSimilarity(similarity: number): boolean {
+  // 相似度 > 0.7 且 < 1.0 视为高相似度，应该标记为修改
+  return similarity > 0.7 && similarity < 1.0;
+}
+
+// 判断是否为中等相似度（在LCS中考虑匹配）
+export function isMediumSimilarity(similarity: number): boolean {
+  // 提高阈值：只有相似度 > 0.7 才在LCS中考虑为潜在匹配
+  return similarity > 0.7;
+}
+
+// 严格内容比较：用于完全相同的行
+export function isExactMatch(text1: string, text2: string): boolean {
+  return text1 === text2;
+}
+
+// 寻找精确匹配行
+export function findExactMatch(
+  originalLine: string,
+  modifiedLines: string[],
+  startModIndex: number,
+  searchRange: number = 20
+): { matchIndex: number; found: boolean } {
+  // 向前搜索精确匹配
+  for (let i = 0; i < Math.min(searchRange, modifiedLines.length - startModIndex); i++) {
+    if (isExactMatch(originalLine, modifiedLines[startModIndex + i])) {
+      return { matchIndex: i, found: true };
+    }
+  }
+
+  // 向后搜索精确匹配（如果可能）
+  for (let i = 1; i <= Math.min(searchRange, startModIndex); i++) {
+    if (isExactMatch(originalLine, modifiedLines[startModIndex - i])) {
+      return { matchIndex: -i, found: true }; // 负数表示向后搜索
+    }
+  }
+
+  return { matchIndex: -1, found: false };
+}
+
+// 构建相似度匹配矩阵（用于LCS算法）
+export function buildMatchMatrix(originalLines: string[], modifiedLines: string[]): number[][] {
+  const matrix: number[][] = [];
+
+  for (let i = 0; i <= originalLines.length; i++) {
+    matrix[i] = [];
+    for (let j = 0; j <= modifiedLines.length; j++) {
+      if (i === 0 || j === 0) {
+        matrix[i][j] = 0;
+      } else {
+        const similarity = calculateSimilarity(originalLines[i - 1], modifiedLines[j - 1]);
+        // 中等相似度以上视为潜在匹配
+        if (isMediumSimilarity(similarity)) {
+          matrix[i][j] = matrix[i - 1][j - 1] + 1;
+        } else {
+          matrix[i][j] = Math.max(matrix[i - 1][j], matrix[i][j - 1]);
+        }
+      }
+    }
+  }
+
+  return matrix;
+}
+
+// 回溯相似度匹配矩阵，找到匹配的行对
+export function backtrackLCS(
+  matrix: number[][],
+  originalLines: string[],
+  modifiedLines: string[]
+): { origIndices: number[]; modIndices: number[] } {
+  const origIndices: number[] = [];
+  const modIndices: number[] = [];
+
+  let i = originalLines.length;
+  let j = modifiedLines.length;
+
+  while (i > 0 && j > 0) {
+    const similarity = calculateSimilarity(originalLines[i - 1], modifiedLines[j - 1]);
+
+    // 中等相似度以上且是匹配路径才视为匹配
+    if (isMediumSimilarity(similarity) && matrix[i][j] === matrix[i - 1][j - 1] + 1) {
+      // 找到相似度匹配
+      origIndices.unshift(i - 1);
+      modIndices.unshift(j - 1);
+      i--;
+      j--;
+    } else if (matrix[i - 1][j] >= matrix[i][j - 1]) {
+      i--;
+    } else {
+      j--;
+    }
+  }
+
+  return { origIndices, modIndices };
+}
+
+// 灵敏文档对比算法：高相似度视为修改，低相似度视为删除+新增
+export function compareDocuments(originalText: string, modifiedText: string): ParagraphDiff[] {
+  const originalLines = splitIntoLines(originalText);
+  const modifiedLines = splitIntoLines(modifiedText);
+
+  const diffs: ParagraphDiff[] = [];
+  let currentLineNumber = 1;
+
+  // 使用相似度匹配LCS算法找到潜在的匹配行
+  const matrix = buildMatchMatrix(originalLines, modifiedLines);
+  const { origIndices, modIndices } = backtrackLCS(matrix, originalLines, modifiedLines);
+
+  // 添加虚拟的结束索引，便于处理
+  origIndices.push(originalLines.length);
+  modIndices.push(modifiedLines.length);
+
+  let origIndex = 0;
+  let modIndex = 0;
+
+  // 处理每个匹配段之间的差异
+  for (let matchIndex = 0; matchIndex < origIndices.length; matchIndex++) {
+    const matchOrigIndex = origIndices[matchIndex];
+    const matchModIndex = modIndices[matchIndex];
+
+    // 处理当前匹配之前的差异区域
+    while (origIndex < matchOrigIndex || modIndex < matchModIndex) {
+      // 如果原始文档已经处理完这段区域
+      if (origIndex >= matchOrigIndex) {
+        // 这些都是新增的行
+        while (modIndex < matchModIndex) {
+          const modifiedLine = modifiedLines[modIndex];
+          diffs.push({
+            type: 'added',
+            modified: modifiedLine,
+            lineNumber: currentLineNumber++
+          });
+          modIndex++;
+        }
+        break;
+      }
+
+      // 如果修改后文档已经处理完这段区域
+      if (modIndex >= matchModIndex) {
+        // 这些都是删除的行
+        while (origIndex < matchOrigIndex) {
+          const originalLine = originalLines[origIndex];
+          diffs.push({
+            type: 'removed',
+            original: originalLine,
+            lineNumber: currentLineNumber++
+          });
+          origIndex++;
+        }
+        break;
+      }
+
+      const originalLine = originalLines[origIndex];
+      const modifiedLine = modifiedLines[modIndex];
+
+      // 计算相似度
+      const similarity = calculateSimilarity(originalLine, modifiedLine);
+
+      if (isHighSimilarity(similarity)) {
+        // 高相似度，视为修改
+        diffs.push({
+          type: 'modified',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+        origIndex++;
+        modIndex++;
+      } else {
+        // 低相似度，分别作为删除和新增处理
+        diffs.push({
+          type: 'removed',
+          original: originalLine,
+          lineNumber: currentLineNumber++
+        });
+        diffs.push({
+          type: 'added',
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+        origIndex++;
+        modIndex++;
+      }
+    }
+
+    // 添加匹配的行
+    if (matchIndex < origIndices.length - 1) {
+      // 只有在不是虚拟结束索引时才添加
+      const originalLine = originalLines[matchOrigIndex];
+      const modifiedLine = modifiedLines[matchModIndex];
+      const similarity = calculateSimilarity(originalLine, modifiedLine);
+
+      if (isExactMatch(originalLine, modifiedLine)) {
+        // 完全相同，视为未修改
+        diffs.push({
+          type: 'unchanged',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+      } else if (isHighSimilarity(similarity)) {
+        // 高相似度，视为修改
+        diffs.push({
+          type: 'modified',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+      } else {
+        // 中等相似度，视为未修改（这些在LCS中已经处理过了）
+        diffs.push({
+          type: 'unchanged',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+      }
+    }
+
+    origIndex = matchOrigIndex + 1;
+    modIndex = matchModIndex + 1;
+  }
+
+  return diffs;
+}
+
+// 换行容差比较函数
+export function compareWithLineBreakTolerance(
+  originalLine: string,
+  modifiedLine: string,
+  originalLines: string[],
+  modifiedLines: string[],
+  origIndex: number,
+  modIndex: number,
+  options: LineBreakToleranceOptions = {}
+): boolean {
+  const { enableLineBreakTolerance = true, scanRange = 3, toleranceThreshold = 0.95 } = options;
+
+  if (!enableLineBreakTolerance) {
+    return false;
+  }
+
+  // 如果两行完全相同，直接返回 true
+  if (originalLine === modifiedLine) {
+    return true;
+  }
+
+  // 扫描原始文档附近几行，合并后与修改文档比较
+  for (
+    let i = Math.max(0, origIndex - scanRange);
+    i <= Math.min(originalLines.length - 1, origIndex + scanRange);
+    i++
+  ) {
+    for (
+      let j = Math.max(0, modIndex - scanRange);
+      j <= Math.min(modifiedLines.length - 1, modIndex + scanRange);
+      j++
+    ) {
+      // 跳过当前行本身的比较
+      if (i === origIndex && j === modIndex) continue;
+
+      // 合并原始文档的多行
+      const originalSegment = originalLines
+        .slice(Math.min(origIndex, i), Math.max(origIndex, i) + 1)
+        .join('')
+        .replace(/\s+/g, '') // 移除所有空白字符
+        .toLowerCase();
+
+      // 合并修改文档的多行
+      const modifiedSegment = modifiedLines
+        .slice(Math.min(modIndex, j), Math.max(modIndex, j) + 1)
+        .join('')
+        .replace(/\s+/g, '') // 移除所有空白字符
+        .toLowerCase();
+
+      // 如果合并后的内容完全相同，则认为是换行差异
+      if (originalSegment === modifiedSegment && originalSegment.length > 0) {
+        return true;
+      }
+
+      // 如果合并后的内容相似度很高，也考虑容差
+      const similarity = calculateSimilarity(originalSegment, modifiedSegment);
+      if (similarity >= toleranceThreshold) {
+        return true;
+      }
+    }
+  }
+
+  // 额外检查：扫描临近2行去掉换行符后的情况
+  for (
+    let i = Math.max(0, origIndex - 2);
+    i <= Math.min(originalLines.length - 1, origIndex + 2);
+    i++
+  ) {
+    for (
+      let j = Math.max(0, modIndex - 2);
+      j <= Math.min(modifiedLines.length - 1, modIndex + 2);
+      j++
+    ) {
+      // 跳过完全相同的情况（已经处理过）
+      if (i === origIndex && j === modIndex) continue;
+
+      // 检查去掉换行符后的多行组合
+      const origSegment = originalLines
+        .slice(Math.min(origIndex, i), Math.max(origIndex, i) + 1)
+        .join('') // 去掉换行符
+        .replace(/\s+/g, '') // 移除所有空白字符
+        .toLowerCase();
+
+      const modSegment = modifiedLines
+        .slice(Math.min(modIndex, j), Math.max(modIndex, j) + 1)
+        .join('') // 去掉换行符
+        .replace(/\s+/g, '') // 移除所有空白字符
+        .toLowerCase();
+
+      if (origSegment === modSegment && origSegment.length > 0) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+// 带容差的文档比较函数
+export function compareDocumentsWithTolerance(
+  originalText: string,
+  modifiedText: string,
+  toleranceOptions?: LineBreakToleranceOptions
+): ParagraphDiff[] {
+  const originalLines = splitIntoLines(originalText);
+  const modifiedLines = splitIntoLines(modifiedText);
+
+  const diffs: ParagraphDiff[] = [];
+  let currentLineNumber = 1;
+
+  // 使用相似度匹配LCS算法找到潜在的匹配行
+  const matrix = buildMatchMatrix(originalLines, modifiedLines);
+  const { origIndices, modIndices } = backtrackLCS(matrix, originalLines, modifiedLines);
+
+  // 添加虚拟的结束索引，便于处理
+  origIndices.push(originalLines.length);
+  modIndices.push(modifiedLines.length);
+
+  let origIndex = 0;
+  let modIndex = 0;
+
+  // 处理每个匹配段之间的差异
+  for (let matchIndex = 0; matchIndex < origIndices.length; matchIndex++) {
+    const matchOrigIndex = origIndices[matchIndex];
+    const matchModIndex = modIndices[matchIndex];
+
+    // 处理当前匹配之前的差异区域
+    while (origIndex < matchOrigIndex || modIndex < matchModIndex) {
+      // 如果原始文档已经处理完这段区域
+      if (origIndex >= matchOrigIndex) {
+        // 这些都是新增的行
+        while (modIndex < matchModIndex) {
+          const modifiedLine = modifiedLines[modIndex];
+          diffs.push({
+            type: 'added',
+            modified: modifiedLine,
+            lineNumber: currentLineNumber++
+          });
+          modIndex++;
+        }
+        break;
+      }
+
+      // 如果修改后文档已经处理完这段区域
+      if (modIndex >= matchModIndex) {
+        // 这些都是删除的行
+        while (origIndex < matchOrigIndex) {
+          const originalLine = originalLines[origIndex];
+          diffs.push({
+            type: 'removed',
+            original: originalLine,
+            lineNumber: currentLineNumber++
+          });
+          origIndex++;
+        }
+        break;
+      }
+
+      const originalLine = originalLines[origIndex];
+      const modifiedLine = modifiedLines[modIndex];
+
+      // 计算相似度
+      const similarity = calculateSimilarity(originalLine, modifiedLine);
+
+      // 首先检查换行容差
+      if (
+        compareWithLineBreakTolerance(
+          originalLine,
+          modifiedLine,
+          originalLines,
+          modifiedLines,
+          origIndex,
+          modIndex,
+          toleranceOptions
+        )
+      ) {
+        // 换行容差匹配成功，视为未修改
+        diffs.push({
+          type: 'unchanged',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+        origIndex++;
+        modIndex++;
+      } else if (isHighSimilarity(similarity)) {
+        // 高相似度，视为修改
+        diffs.push({
+          type: 'modified',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+        origIndex++;
+        modIndex++;
+      } else {
+        // 低相似度，分别作为删除和新增处理
+        diffs.push({
+          type: 'removed',
+          original: originalLine,
+          lineNumber: currentLineNumber++
+        });
+        diffs.push({
+          type: 'added',
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+        origIndex++;
+        modIndex++;
+      }
+    }
+
+    // 添加匹配的行
+    if (matchIndex < origIndices.length - 1) {
+      // 只有在不是虚拟结束索引时才添加
+      const originalLine = originalLines[matchOrigIndex];
+      const modifiedLine = modifiedLines[matchModIndex];
+      const similarity = calculateSimilarity(originalLine, modifiedLine);
+
+      if (similarity >= 1.0) {
+        // 完全相同，视为未修改
+        diffs.push({
+          type: 'unchanged',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+      } else if (isHighSimilarity(similarity)) {
+        // 高相似度，视为修改
+        diffs.push({
+          type: 'modified',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+      } else {
+        // 中等相似度，视为未修改（这些在LCS中已经处理过了）
+        diffs.push({
+          type: 'unchanged',
+          original: originalLine,
+          modified: modifiedLine,
+          lineNumber: currentLineNumber++
+        });
+      }
+    }
+
+    origIndex = matchOrigIndex + 1;
+    modIndex = matchModIndex + 1;
+  }
+
+  return diffs;
+}
diff --git a/modules/tool/packages/docDiff/src/index.ts b/modules/tool/packages/docDiff/src/index.ts
index bb7d9159..b733d771 100644
--- a/modules/tool/packages/docDiff/src/index.ts
+++ b/modules/tool/packages/docDiff/src/index.ts
@@ -1,10 +1,25 @@
 import { uploadFile } from '@tool/utils/uploadFile';
 import { z } from 'zod';
+import {
+  compareDocuments,
+  compareDocumentsWithTolerance,
+  type ParagraphDiff,
+  type LineBreakToleranceOptions
+} from './diffAlgorithm';
+import { applyFullNormalization } from './textNormalizer';
 
 export const InputType = z.object({
   originalText: z.string().min(1, '原始文档内容不能为空'),
   modifiedText: z.string().min(1, '修改后文档内容不能为空'),
-  title: z.string().optional().default('文档对比报告')
+  title: z.string().optional().default('文档对比报告'),
+  // 换行容差选项
+  lineTolerance: z
+    .object({
+      enableLineBreakTolerance: z.boolean().optional().default(true),
+      scanRange: z.number().optional().default(3),
+      toleranceThreshold: z.number().optional().default(0.95)
+    })
+    .optional()
 });
 
 export const OutputType = z.object({
@@ -24,6 +39,8 @@ export type InputType = {
   originalText: string;
   modifiedText: string;
   title?: string;
+  // 换行容差选项
+  lineTolerance?: LineBreakToleranceOptions;
 };
 
 // 输出类型
@@ -37,177 +54,6 @@ export type OutputType = {
   }[];
 };
 
-// 定义段落差异类型
-type DiffType = 'unchanged' | 'added' | 'removed' | 'modified';
-
-interface ParagraphDiff {
-  type: DiffType;
-  original?: string;
-  modified?: string;
-  lineNumber?: number;
-}
-
-// 分割文档为行
-function splitIntoLines(text: string): string[] {
-  return text.split('\n');
-}
-
-// 计算两个段的相似度
-function calculateSimilarity(text1: string, text2: string): number {
-  // 移除首尾空白字符
-  const clean1 = text1.trim();
-  const clean2 = text2.trim();
-
-  // 如果两行都为空，则完全相同
-  if (!clean1 && !clean2) return 1.0;
-  if (!clean1 || !clean2) return 0.0;
-
-  // 如果内容完全相同，直接返回1.0
-  if (clean1 === clean2) return 1.0;
-
-  // 移除所有空白字符并转换为小写进行比较
-  const chars1 = clean1.replace(/\s+/g, '').toLowerCase();
-  const chars2 = clean2.replace(/\s+/g, '').toLowerCase();
-
-  const longer = chars1.length > chars2.length ? chars1 : chars2;
-  const shorter = chars1.length > chars2.length ? chars2 : chars1;
-
-  if (longer.length === 0) return 1.0;
-
-  const matches = Array.from(longer).filter(
-    (char, index) => index < shorter.length && char === shorter[index]
-  ).length;
-
-  return matches / longer.length;
-}
-
-// 对比两个文档
-function compareDocuments(originalText: string, modifiedText: string): ParagraphDiff[] {
-  const originalLines = splitIntoLines(originalText);
-  const modifiedLines = splitIntoLines(modifiedText);
-
-  const diffs: ParagraphDiff[] = [];
-  let origIndex = 0;
-  let modIndex = 0;
-  let currentLineNumber = 1; // 使用连续的行号
-
-  while (origIndex < originalLines.length || modIndex < modifiedLines.length) {
-    const originalLine = originalLines[origIndex] || '';
-    const modifiedLine = modifiedLines[modIndex] || '';
-
-    // 如果其中一个文档已经处理完毕
-    if (origIndex >= originalLines.length) {
-      // 只有修改后的文档有内容，这是新增行
-      if (modifiedLine.trim()) {
-        // 只添加非空行
-        diffs.push({
-          type: 'added',
-          modified: modifiedLine,
-          lineNumber: currentLineNumber++
-        });
-      }
-      modIndex++;
-      continue;
-    }
-
-    if (modIndex >= modifiedLines.length) {
-      // 只有原始文档有内容，这是删除行
-      if (originalLine.trim()) {
-        // 只添加非空行
-        diffs.push({
-          type: 'removed',
-          original: originalLine,
-          lineNumber: currentLineNumber++
-        });
-      }
-      origIndex++;
-      continue;
-    }
-
-    // 如果两行都是空的，跳过
-    if (!originalLine.trim() && !modifiedLine.trim()) {
-      origIndex++;
-      modIndex++;
-      continue;
-    }
-
-    // 计算行相似度
-    const similarity = calculateSimilarity(originalLine, modifiedLine);
-
-    if (similarity > 0.9) {
-      // 完全相同的行，标记为unchanged
-      diffs.push({
-        type: 'unchanged',
-        original: originalLine,
-        modified: modifiedLine,
-        lineNumber: currentLineNumber++
-      });
-      origIndex++;
-      modIndex++;
-    } else if (similarity > 0.8) {
-      // 修改的行
-      diffs.push({
-        type: 'modified',
-        original: originalLine,
-        modified: modifiedLine,
-        lineNumber: currentLineNumber++
-      });
-      origIndex++;
-      modIndex++;
-    } else {
-      // 寻找最佳匹配
-      let bestMatchIndex = -1;
-      let bestSimilarity = 0;
-
-      for (let i = 0; i < Math.min(3, modifiedLines.length - modIndex); i++) {
-        const candidateSimilarity = calculateSimilarity(originalLine, modifiedLines[modIndex + i]);
-        if (candidateSimilarity > bestSimilarity) {
-          bestSimilarity = candidateSimilarity;
-          bestMatchIndex = i;
-        }
-      }
-
-      if (bestSimilarity > 0.6) {
-        // 找到匹配，先添加新增的行
-        for (let i = 0; i < bestMatchIndex; i++) {
-          const addedLine = modifiedLines[modIndex + i];
-          if (addedLine.trim()) {
-            // 只添加非空行
-            diffs.push({
-              type: 'added',
-              modified: addedLine,
-              lineNumber: currentLineNumber++
-            });
-          }
-        }
-
-        // 添加修改的行
-        diffs.push({
-          type: 'modified',
-          original: originalLine,
-          modified: modifiedLines[modIndex + bestMatchIndex],
-          lineNumber: currentLineNumber++
-        });
-        modIndex += bestMatchIndex + 1;
-        origIndex++;
-      } else {
-        // 没有找到匹配，可能是删除
-        if (originalLine.trim()) {
-          // 只添加非空行
-          diffs.push({
-            type: 'removed',
-            original: originalLine,
-            lineNumber: currentLineNumber++
-          });
-        }
-        origIndex++;
-      }
-    }
-  }
-
-  return diffs;
-}
-
 // 生成 HTML 报告
 function generateHtmlReport(diffs: ParagraphDiff[], title: string): string {
   const timestamp = new Date().toLocaleString('zh-CN');
@@ -1291,7 +1137,22 @@ export async function tool(input: z.infer<typeof InputType>) {
   // Zod 会自动验证输入，如果验证失败会抛出错误
   const validatedInput = InputType.parse(input);
 
-  const diffs = compareDocuments(validatedInput.originalText, validatedInput.modifiedText);
+  // 1. 文本标准化预处理（使用默认配置）
+  const normalizedOriginal = applyFullNormalization(validatedInput.originalText);
+  const normalizedModified = applyFullNormalization(validatedInput.modifiedText);
+
+  // 2. 根据是否启用换行容差选择比较函数
+  let diffs: ParagraphDiff[];
+  if (validatedInput.lineTolerance?.enableLineBreakTolerance) {
+    diffs = compareDocumentsWithTolerance(
+      normalizedOriginal,
+      normalizedModified,
+      validatedInput.lineTolerance
+    );
+  } else {
+    diffs = compareDocuments(normalizedOriginal, normalizedModified);
+  }
+
   const html = generateHtmlReport(diffs, validatedInput.title);
 
   const uploadResult = await uploadFile({
diff --git a/modules/tool/packages/docDiff/src/textNormalizer.ts b/modules/tool/packages/docDiff/src/textNormalizer.ts
new file mode 100644
index 00000000..2eb74b3d
--- /dev/null
+++ b/modules/tool/packages/docDiff/src/textNormalizer.ts
@@ -0,0 +1,449 @@
+/**
+ * 文本标准化模块
+ * 用于预处理文本，移除格式化语法和多余空格
+ */
+
+interface NormalizationOptions {
+  /** 是否移除 Markdown 格式化语法 */
+  removeMarkdownFormatting?: boolean;
+  /** 是否保留表格格式 */
+  preserveTables?: boolean;
+  /** 是否移除文本中间的多余空格 */
+  removeExtraSpaces?: boolean;
+  /** 是否删除所有文本间的空格（更激进的处理，包括中英文间空格） */
+  removeTextSpaces?: boolean;
+  /** 是否智能处理中英文混排空格（删除中英文间空格，保留英文单词内结构） */
+  removeIntelligentSpaces?: boolean;
+  /** 是否将全角标点符号转换为半角 */
+  convertPunctuation?: boolean;
+}
+
+/**
+ * 标准化文本
+ */
+function normalizeText(text: string, options: NormalizationOptions = {}): string {
+  const {
+    removeMarkdownFormatting = true,
+    preserveTables = true,
+    removeExtraSpaces = true,
+    removeTextSpaces = false,
+    removeIntelligentSpaces: enableIntelligentSpaces = false,
+    convertPunctuation = false
+  } = options;
+
+  let result = text;
+
+  // 标准化处理顺序：
+  // 1. 全角转半角（最先进行，避免影响后续格式识别）
+  if (convertPunctuation) {
+    result = convertFullWidthToHalfWidth(result);
+  }
+
+  // 2. 合并多个空行（在格式处理前进行，避免空行影响格式识别）
+  result = mergeMultipleEmptyLines(result);
+
+  // 3. 根据是否保留表格采用不同的处理策略
+  if (preserveTables) {
+    // 保留表格：逐行处理，区分表格行和非表格行
+    const lines = result.split('\n');
+    const processedLines = lines.map((line) => {
+      if (isTableRow(line)) {
+        // 表格行：跳过 Markdown 处理，只处理空格
+        return processTableRow(line, {
+          removeTextSpaces,
+          enableIntelligentSpaces,
+          removeExtraSpaces
+        });
+      } else {
+        // 非表格行：应用完整处理流程
+        let processedLine = line;
+
+        // 先处理 Markdown 格式
+        if (removeMarkdownFormatting) {
+          processedLine = removeMarkdownFormattingSyntax(processedLine);
+        }
+
+        // 再处理空格
+        processedLine = processSpaces(processedLine, {
+          removeTextSpaces,
+          enableIntelligentSpaces,
+          removeExtraSpaces
+        });
+
+        return processedLine;
+      }
+    });
+    result = processedLines.join('\n');
+  } else {
+    // 不保留表格：直接应用完整处理流程
+    // 3. 处理 Markdown 格式（如果启用）
+    if (removeMarkdownFormatting) {
+      result = removeMarkdownFormattingSyntax(result);
+    }
+
+    // 4. 最后处理空格（避免影响格式化识别）
+    result = processSpaces(result, {
+      removeTextSpaces,
+      enableIntelligentSpaces,
+      removeExtraSpaces
+    });
+  }
+
+  return result;
+}
+
+/**
+ * 合并多个空行
+ * 将连续的空行（2个或更多）合并为单个空行
+ */
+function mergeMultipleEmptyLines(text: string): string {
+  return text.replace(/\n{3,}/g, '\n\n');
+}
+
+/**
+ * 处理空格的统一函数
+ */
+function processSpaces(
+  text: string,
+  options: {
+    removeTextSpaces: boolean;
+    enableIntelligentSpaces: boolean;
+    removeExtraSpaces: boolean;
+  }
+): string {
+  const { removeTextSpaces, enableIntelligentSpaces, removeExtraSpaces } = options;
+
+  if (removeTextSpaces) {
+    return removeAllTextSpaces(text);
+  } else if (enableIntelligentSpaces) {
+    return removeIntelligentSpaces(text);
+  } else if (removeExtraSpaces) {
+    return removeExtraWhitespace(text);
+  } else {
+    return text;
+  }
+}
+
+/**
+ * 处理表格行的空格，保留表格结构
+ */
+function processTableRow(
+  line: string,
+  options: {
+    removeTextSpaces: boolean;
+    enableIntelligentSpaces: boolean;
+    removeExtraSpaces: boolean;
+  }
+): string {
+  const { removeTextSpaces, enableIntelligentSpaces, removeExtraSpaces } = options;
+
+  if (removeTextSpaces) {
+    // 删除所有空格，保留表格分隔符
+    return line
+      .replace(/\s+/g, '') // 删除所有空格
+      .replace(/\|\|/g, '|') // 修复可能连续的分隔符
+      .replace(/^\||\|$/g, ''); // 删除首尾多余的分隔符
+  } else if (enableIntelligentSpaces) {
+    // 智能处理表格单元格内的空格
+    return line
+      .split('|')
+      .map((cell) => {
+        // eslint-disable-next-line no-control-regex
+        const hasNonEnglish = /[^\x00-\x7F]/.test(cell);
+        const isPureEnglish = /^[a-zA-Z0-9\s]*$/.test(cell.trim());
+
+        if (cell.trim() === '') return cell; // 空单元格
+
+        if (isPureEnglish) {
+          // 纯英文：保留单词间的单个空格
+          return cell.replace(/\s+/g, ' ').trim();
+        } else if (hasNonEnglish) {
+          // 包含中文：删除所有空格
+          return cell.replace(/\s+/g, '');
+        } else {
+          // 其他情况：删除多余空格
+          return cell.replace(/\s+/g, ' ').trim();
+        }
+      })
+      .join('|');
+  } else if (removeExtraSpaces) {
+    return removeExtraWhitespace(line);
+  } else {
+    return line;
+  }
+}
+
+/**
+ * 判断是否是表格行
+ */
+function isTableRow(line: string): boolean {
+  // 表格行的特征：
+  // 1. 包含管道符 |
+  // 2. 以 | 开头或包含 | | 模式（分隔行）
+  // 3. 不是普通文本中的单个 |
+  const trimmed = line.trim();
+  return (
+    trimmed.includes('|') &&
+    (trimmed.startsWith('|') ||
+      trimmed.includes('| |') ||
+      /^[\s]*\|.*\|[\s]*$/.test(line) ||
+      /^[\s]*\|[\s\-:]+\|[\s]*$/.test(line))
+  );
+}
+
+/**
+ * 移除 Markdown 格式化语法（保留表格结构）
+ */
+function removeMarkdownFormattingSyntax(text: string): string {
+  // 1. 移除标题格式
+  text = text.replace(/^(#{1,6})\s+/gm, '');
+
+  // 2. 移除加粗格式 **text**
+  text = text.replace(/\*\*([^*]+)\*\*/g, '$1');
+
+  // 3. 移除斜体格式 *text* 和 _text_
+  text = text.replace(/\*([^*]+)\*/g, '$1');
+  text = text.replace(/_([^_]+)_/g, '$1');
+
+  // 4. 移除删除线格式 ~~text~~
+  text = text.replace(/~~([^~]+)~~/g, '$1');
+
+  // 5. 移除行内代码格式 `text`
+  text = text.replace(/`([^`]+)`/g, '$1');
+
+  // 6. 移除链接格式 [text](url)
+  text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
+
+  // 7. 移除引用格式 >
+  text = text.replace(/^[>\s]+/gm, '');
+
+  // 8. 移除列表标记（-、*、+、数字.）
+  text = text.replace(/^[\s]*[-*+]\s+/gm, '');
+  text = text.replace(/^[\s]*\d+\.\s+/gm, '');
+
+  // 9. 移除代码块标记 ```
+  text = text.replace(/```(\w+)?\s*([\s\S]*?)\s*```/g, '$2');
+
+  return text;
+}
+
+/**
+ * 移除多余的空格
+ */
+function removeExtraWhitespace(text: string): string {
+  // 1. 将多个连续空格替换为单个空格
+  text = text.replace(/[ \t]+/g, ' ');
+
+  // 2. 移除行首行尾空格
+  text = text.replace(/^[ \t]+|[ \t]+$/gm, '');
+
+  // 3. 移除多余的换行符（保留空行结构）
+  text = text.replace(/\n{3,}/g, '\n\n');
+
+  return text;
+}
+
+/**
+ * 删除所有文本间的空格（更激进的处理）
+ * 对于中英文混排，会删除所有空格包括中英文之间的空格
+ */
+function removeAllTextSpaces(text: string): string {
+  // 保留表格结构，但删除表格内容中的空格
+  const lines = text.split('\n');
+  return lines
+    .map((line) => {
+      if (isTableRow(line)) {
+        // 表格行：保留表格分隔符，但删除单元格内容中的空格
+        return line
+          .split('|')
+          .map((cell) => {
+            // 删除单元格内的所有空格，但保留基本结构
+            return cell.replace(/\s+/g, '');
+          })
+          .join('|');
+      } else {
+        // 普通行：删除所有空格（包括中英文之间的空格）
+        return line.replace(/\s+/g, '');
+      }
+    })
+    .join('\n');
+}
+
+/**
+ * 将全角标点符号转换为半角
+ */
+function convertFullWidthToHalfWidth(text: string): string {
+  // 全角字符到半角字符的映射
+  const fullWidthToHalfWidth = {
+    // 标点符号
+    '，': ',',
+    '。': '.',
+    '！': '!',
+    '？': '?',
+    '；': ';',
+    '：': ':',
+    '（': '(',
+    '）': ')',
+    '【': '[',
+    '】': ']',
+    '｛': '{',
+    '｝': '}',
+    '"': '"',
+    "'": "'",
+    '《': '<',
+    '》': '>',
+    '〈': '<',
+    '〉': '>',
+    '…': '...',
+    '—': '-',
+    '——': '--',
+    '·': '.',
+    // 数字
+    '０': '0',
+    '１': '1',
+    '２': '2',
+    '３': '3',
+    '４': '4',
+    '５': '5',
+    '６': '6',
+    '７': '7',
+    '８': '8',
+    '９': '9',
+    // 字母
+    ａ: 'a',
+    ｂ: 'b',
+    ｃ: 'c',
+    ｄ: 'd',
+    ｅ: 'e',
+    ｆ: 'f',
+    ｇ: 'g',
+    ｈ: 'h',
+    ｉ: 'i',
+    ｊ: 'j',
+    ｋ: 'k',
+    ｌ: 'l',
+    ｍ: 'm',
+    ｎ: 'n',
+    ｏ: 'o',
+    ｐ: 'p',
+    ｑ: 'q',
+    ｒ: 'r',
+    ｓ: 's',
+    ｔ: 't',
+    ｕ: 'u',
+    ｖ: 'v',
+    ｗ: 'w',
+    ｘ: 'x',
+    ｙ: 'y',
+    ｚ: 'z',
+    Ａ: 'A',
+    Ｂ: 'B',
+    Ｃ: 'C',
+    Ｄ: 'D',
+    Ｅ: 'E',
+    Ｆ: 'F',
+    Ｇ: 'G',
+    Ｈ: 'H',
+    Ｉ: 'I',
+    Ｊ: 'J',
+    Ｋ: 'K',
+    Ｌ: 'L',
+    Ｍ: 'M',
+    Ｎ: 'N',
+    Ｏ: 'O',
+    Ｐ: 'P',
+    Ｑ: 'Q',
+    Ｒ: 'R',
+    Ｓ: 'S',
+    Ｔ: 'T',
+    Ｕ: 'U',
+    Ｖ: 'V',
+    Ｗ: 'W',
+    Ｘ: 'X',
+    Ｙ: 'Y',
+    Ｚ: 'Z',
+    // 空格
+    '　': ' '
+  };
+
+  // 使用正则表达式替换所有全角字符
+  return text.replace(/[\uff00-\uffef]/g, (char) => {
+    return fullWidthToHalfWidth[char as keyof typeof fullWidthToHalfWidth] || char;
+  });
+}
+
+/**
+ * 智能处理中英文混排的空格
+ * 保留必要的分隔，但删除多余的空格
+ */
+function removeIntelligentSpaces(text: string): string {
+  const lines = text.split('\n');
+  return lines
+    .map((line) => {
+      if (isTableRow(line)) {
+        // 表格行：保留表格结构，但智能处理单元格内容
+        return line
+          .split('|')
+          .map((cell) => {
+            // 保留英文单词间的单个空格，删除其他多余空格
+            const processedCell = cell
+              .replace(/\s+/g, ' ') // 多个空格合并为单个
+              // 移除中英文之间的空格，但保留英文单词间的空格
+              // eslint-disable-next-line no-control-regex
+              .replace(/([a-zA-Z]+)\s+([^\x00-\x7F]+)/g, '$1$2') // 英文后跟中文，移除空格
+              // eslint-disable-next-line no-control-regex
+              .replace(/([^\x00-\x7F]+)\s+([a-zA-Z]+)/g, '$1$2'); // 中文后跟英文，移除空格
+
+            // 判断是否是纯英文内容
+            // eslint-disable-next-line no-control-regex
+            const hasNonEnglish = /[^\x00-\x7F]/.test(processedCell);
+
+            if (!hasNonEnglish) {
+              // 纯英文：保留单词间的单个空格
+              return processedCell.replace(/\s+/g, ' ').trim();
+            } else {
+              // 包含中文：删除所有剩余空格
+              return processedCell.replace(/\s+/g, '').trim();
+            }
+          })
+          .join('|');
+      } else {
+        // 普通行：智能处理空格
+        const processedLine = line
+          .replace(/\s+/g, ' ') // 多个空格合并为单个
+          // 移除中英文之间的空格，但保留英文单词间的空格
+          // eslint-disable-next-line no-control-regex
+          .replace(/([a-zA-Z]+)\s+([^\x00-\x7F]+)/g, '$1$2') // 英文后跟中文，移除空格
+          // eslint-disable-next-line no-control-regex
+          .replace(/([^\x00-\x7F]+)\s+([a-zA-Z]+)/g, '$1$2'); // 中文后跟英文，移除空格
+
+        // 判断是否是纯英文内容
+        // eslint-disable-next-line no-control-regex
+        const hasNonEnglish = /[^\x00-\x7F]/.test(processedLine);
+
+        if (!hasNonEnglish) {
+          // 纯英文：保留单词间的单个空格
+          return processedLine.replace(/\s+/g, ' ').trim();
+        } else {
+          // 包含中文：删除所有剩余空格
+          return processedLine.replace(/\s+/g, '').trim();
+        }
+      }
+    })
+    .join('\n');
+}
+
+/**
+ * 应用完整的标准化流程
+ */
+export function applyFullNormalization(text: string): string {
+  // 使用默认的标准化配置
+  return normalizeText(text, {
+    removeMarkdownFormatting: true,
+    preserveTables: true,
+    removeExtraSpaces: true,
+    removeTextSpaces: false,
+    removeIntelligentSpaces: true,
+    convertPunctuation: true
+  });
+}
diff --git a/modules/tool/packages/docDiff/test/diffAlgorithm.test.ts b/modules/tool/packages/docDiff/test/diffAlgorithm.test.ts
new file mode 100644
index 00000000..4dc619ba
--- /dev/null
+++ b/modules/tool/packages/docDiff/test/diffAlgorithm.test.ts
@@ -0,0 +1,398 @@
+import { describe, it, expect } from 'vitest';
+import {
+  calculateSimilarity,
+  isHighSimilarity,
+  isMediumSimilarity,
+  buildMatchMatrix,
+  backtrackLCS,
+  compareDocuments,
+  splitIntoLines
+} from '../src/diffAlgorithm';
+
+describe('灵敏相似度 Diff 算法核心功能测试', () => {
+  describe('calculateSimilarity', () => {
+    it('应该正确计算完全相同的文本相似度', () => {
+      expect(calculateSimilarity('hello', 'hello')).toBe(1.0);
+      expect(calculateSimilarity('相同内容', '相同内容')).toBe(1.0);
+      expect(calculateSimilarity('', '')).toBe(1.0);
+    });
+
+    it('应该正确计算空文本的相似度', () => {
+      expect(calculateSimilarity('hello', '')).toBe(0.0);
+      expect(calculateSimilarity('', 'world')).toBe(0.0);
+    });
+
+    it('应该对空格变化敏感', () => {
+      const sim = calculateSimilarity('hello world', 'hello  world');
+      expect(sim).toBeGreaterThan(0.9); // 多一个空格，相似度应该很高
+      expect(sim).toBeLessThan(1.0);
+    });
+
+    it('应该对标点符号变化敏感', () => {
+      const sim = calculateSimilarity('你好，世界', '你好！世界');
+      expect(sim).toBeGreaterThan(0.7); // 标点符号变化，相似度应该较高
+      expect(sim).toBeLessThan(1.0);
+    });
+
+    it('应该对大小写变化敏感', () => {
+      const sim = calculateSimilarity('Hello', 'hello');
+      expect(sim).toBeGreaterThan(0.7); // 大小写变化，相似度应该较高
+      expect(sim).toBeLessThan(1.0);
+    });
+
+    it('应该正确计算大幅修改的相似度', () => {
+      const sim = calculateSimilarity('hello world', 'completely different');
+      expect(sim).toBeLessThan(0.5); // 大幅修改，相似度应该较低
+    });
+  });
+
+  describe('isHighSimilarity 和 isMediumSimilarity', () => {
+    it('应该正确识别高相似度', () => {
+      expect(isHighSimilarity(0.8)).toBe(true);
+      expect(isHighSimilarity(0.71)).toBe(true);
+      expect(isHighSimilarity(0.7)).toBe(false);
+      expect(isHighSimilarity(1.0)).toBe(false); // 完全匹配是精确匹配，不是高相似度
+    });
+
+    it('应该正确识别中等相似度', () => {
+      expect(isMediumSimilarity(0.6)).toBe(false); // 低于0.7阈值
+      expect(isMediumSimilarity(0.51)).toBe(false); // 低于0.7阈值
+      expect(isMediumSimilarity(0.5)).toBe(false);
+      expect(isMediumSimilarity(0.8)).toBe(true);
+      expect(isMediumSimilarity(1.0)).toBe(true); // 完全匹配也符合中等相似度
+    });
+  });
+
+  describe('splitIntoLines', () => {
+    it('应该正确分割文本行', () => {
+      const text = '第1行\n第2行\n第3行';
+      const lines = splitIntoLines(text);
+      expect(lines).toEqual(['第1行', '第2行', '第3行']);
+    });
+
+    it('应该处理空行', () => {
+      const text = '第1行\n\n第3行';
+      const lines = splitIntoLines(text);
+      expect(lines).toEqual(['第1行', '', '第3行']);
+    });
+  });
+
+  describe('buildMatchMatrix', () => {
+    it('应该为空文档构建正确大小的矩阵', () => {
+      const originalLines: string[] = [];
+      const modifiedLines: string[] = [];
+
+      const matrix = buildMatchMatrix(originalLines, modifiedLines);
+
+      expect(matrix).toHaveLength(1);
+      expect(matrix[0]).toHaveLength(1);
+      expect(matrix[0][0]).toBe(0);
+    });
+
+    it('应该构建正确大小的矩阵', () => {
+      const originalLines = ['a', 'b'];
+      const modifiedLines = ['a', 'b', 'c'];
+
+      const matrix = buildMatchMatrix(originalLines, modifiedLines);
+
+      expect(matrix).toHaveLength(3); // originalLines.length + 1
+      expect(matrix[0]).toHaveLength(4); // modifiedLines.length + 1
+    });
+
+    it('应该正确识别高相似度的行', () => {
+      const originalLines = ['第1行', '第2行'];
+      const modifiedLines = ['第1行', '第2行'];
+
+      const matrix = buildMatchMatrix(originalLines, modifiedLines);
+
+      // 完全相同的行应该增加匹配计数
+      expect(matrix[1][1]).toBe(1);
+      expect(matrix[2][2]).toBe(2);
+    });
+
+    it('应该识别中等相似度的行', () => {
+      const originalLines = ['hello world', 'test'];
+      const modifiedLines = ['hello  world', 'test']; // 多一个空格
+
+      const matrix = buildMatchMatrix(originalLines, modifiedLines);
+
+      // 高相似度的行应该增加匹配计数
+      expect(matrix[1][1]).toBe(1);
+      expect(matrix[2][2]).toBe(2);
+    });
+
+    it('应该忽略低相似度的行', () => {
+      const originalLines = ['hello', 'test'];
+      const modifiedLines = ['completely different', 'test'];
+
+      const matrix = buildMatchMatrix(originalLines, modifiedLines);
+
+      // 低相似度的行不应该增加匹配计数
+      expect(matrix[1][1]).toBe(0);
+      expect(matrix[2][2]).toBe(1);
+    });
+  });
+
+  describe('backtrackLCS', () => {
+    it('应该正确回溯高相似度的行', () => {
+      const originalLines = ['第1行', '第2行', '第3行'];
+      const modifiedLines = ['第1行', '第2行', '第3行'];
+
+      const matrix = buildMatchMatrix(originalLines, modifiedLines);
+      const { origIndices, modIndices } = backtrackLCS(matrix, originalLines, modifiedLines);
+
+      expect(origIndices).toEqual([0, 1, 2]);
+      expect(modIndices).toEqual([0, 1, 2]);
+    });
+
+    it('应该处理中等相似度的匹配', () => {
+      const originalLines = ['hello world', '第2行'];
+      const modifiedLines = ['hello  world', '第2行'];
+
+      const matrix = buildMatchMatrix(originalLines, modifiedLines);
+      const { origIndices, modIndices } = backtrackLCS(matrix, originalLines, modifiedLines);
+
+      // 第一行是高相似度，应该被匹配
+      expect(origIndices).toContain(0);
+      expect(modIndices).toContain(0);
+      expect(origIndices).toContain(1);
+      expect(modIndices).toContain(1);
+    });
+  });
+});
+
+describe('灵敏文档对比算法测试', () => {
+  describe('开头插入行的处理', () => {
+    it('应该正确识别在开头插入的单行', () => {
+      const original = '第1行\n第2行\n第3行';
+      const modified = '新插入行\n第1行\n第2行\n第3行';
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该识别出新插入的行
+      expect(diffs.some((diff) => diff.type === 'added' && diff.modified === '新插入行')).toBe(
+        true
+      );
+
+      // 后续行应该被正确识别为未修改
+      const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+      expect(unchangedDiffs.length).toBe(3);
+    });
+
+    it('应该正确处理开头插入多行的情况', () => {
+      const original = '第1行\n第2行';
+      const modified = '插入行A\n插入行B\n第1行\n第2行';
+
+      const diffs = compareDocuments(original, modified);
+
+      const addedDiffs = diffs.filter((diff) => diff.type === 'added');
+      expect(addedDiffs.length).toBe(2);
+
+      const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+      expect(unchangedDiffs.length).toBe(2);
+    });
+  });
+
+  describe('微小修改检测', () => {
+    it('应该将空格变化识别为修改', () => {
+      const original = 'Hello World';
+      const modified = 'Hello  World'; // 多一个空格
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该识别为修改而不是删除+新增
+      expect(diffs.length).toBe(1);
+      expect(diffs[0].type).toBe('modified');
+      expect(diffs[0].original).toBe('Hello World');
+      expect(diffs[0].modified).toBe('Hello  World');
+    });
+
+    it('应该将标点符号变化识别为修改', () => {
+      const original = '你好，世界';
+      const modified = '你好！世界';
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该识别为修改
+      expect(diffs.length).toBe(1);
+      expect(diffs[0].type).toBe('modified');
+      expect(diffs[0].original).toBe('你好，世界');
+      expect(diffs[0].modified).toBe('你好！世界');
+    });
+
+    it('应该将大小写变化识别为修改', () => {
+      const original = 'Hello World';
+      const modified = 'hello world';
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该识别为修改
+      expect(diffs.length).toBe(1);
+      expect(diffs[0].type).toBe('modified');
+    });
+  });
+
+  describe('大幅修改检测', () => {
+    it('应该将完全不同的内容识别为删除+新增', () => {
+      const original = 'Hello World';
+      const modified = 'Completely Different Text';
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该识别为删除和新增
+      expect(diffs.some((diff) => diff.type === 'removed')).toBe(true);
+      expect(diffs.some((diff) => diff.type === 'added')).toBe(true);
+      expect(diffs.some((diff) => diff.type === 'modified')).toBe(false);
+    });
+
+    it('应该正确处理内容完全不同的场景', () => {
+      const original = '第1行\n第2行';
+      const modified = '完全不同的A行\n完全不同的B行';
+
+      const diffs = compareDocuments(original, modified);
+
+      const removedCount = diffs.filter((diff) => diff.type === 'removed').length;
+      const addedCount = diffs.filter((diff) => diff.type === 'added').length;
+      const modifiedCount = diffs.filter((diff) => diff.type === 'modified').length;
+
+      expect(removedCount).toBe(2);
+      expect(addedCount).toBe(2);
+      expect(modifiedCount).toBe(0);
+    });
+  });
+
+  describe('中间插入行的处理', () => {
+    it('应该正确识别中间插入的行', () => {
+      const original = '第1行\n第2行\n第3行\n第4行';
+      const modified = '第1行\n插入行A\n插入行B\n第2行\n第3行\n第4行';
+
+      const diffs = compareDocuments(original, modified);
+
+      const addedDiffs = diffs.filter((diff) => diff.type === 'added');
+      expect(addedDiffs.length).toBe(2);
+
+      const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+      expect(unchangedDiffs.length).toBe(4);
+    });
+  });
+
+  describe('删除行的处理', () => {
+    it('应该正确识别删除的行', () => {
+      const original = '第1行\n要删除的行\n第3行';
+      const modified = '第1行\n第3行';
+
+      const diffs = compareDocuments(original, modified);
+
+      expect(diffs.some((diff) => diff.type === 'removed' && diff.original === '要删除的行')).toBe(
+        true
+      );
+
+      const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+      expect(unchangedDiffs.length).toBe(2);
+    });
+  });
+
+  describe('复杂场景的处理', () => {
+    it('应该正确处理各种修改类型混合的场景', () => {
+      const original = `第1行
+要删除的行
+第3行
+要微改的行
+第5行
+要大幅改的行`;
+
+      const modified = `插入的新行
+第1行
+第3行
+要微改的行${'  '}
+第5行
+完全不同的行`;
+
+      const diffs = compareDocuments(original, modified);
+
+      const addedCount = diffs.filter((diff) => diff.type === 'added').length;
+      const removedCount = diffs.filter((diff) => diff.type === 'removed').length;
+      const modifiedCount = diffs.filter((diff) => diff.type === 'modified').length;
+
+      expect(addedCount).toBe(2); // 插入的新行 + 完全不同的行
+      expect(removedCount).toBe(2); // 要删除的行 + 要大幅改的行
+      expect(modifiedCount).toBe(1); // 要微改的行（增加了空格）
+    });
+  });
+
+  describe('边界情况处理', () => {
+    it('应该处理空文档对比', () => {
+      const original = '';
+      const modified = '新文档内容';
+
+      const diffs = compareDocuments(original, modified);
+
+      expect(diffs.some((diff) => diff.type === 'added')).toBe(true);
+    });
+
+    it('应该处理相同文档对比', () => {
+      const text = '第1行\n第2行\n第3行';
+
+      const diffs = compareDocuments(text, text);
+
+      // 所有行应该都是未修改的
+      expect(diffs.every((diff) => diff.type === 'unchanged')).toBe(true);
+    });
+
+    it('应该处理只有空行的文档', () => {
+      const original = '\n\n\n';
+      const modified = '\n\n\n\n';
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该能够处理而不出错
+      expect(diffs.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe('性能测试', () => {
+    it('应该在合理时间内处理大文档', () => {
+      const largeOriginal = Array.from({ length: 500 }, (_, i) => `第${i + 1}行`).join('\n');
+      const largeModified = largeOriginal + '\n新增的最后行';
+
+      const startTime = Date.now();
+      const diffs = compareDocuments(largeOriginal, largeModified);
+      const endTime = Date.now();
+
+      expect(diffs.length).toBeGreaterThan(0);
+      expect(endTime - startTime).toBeLessThan(2000); // 应该在2秒内完成
+    });
+  });
+
+  describe('特殊字符处理', () => {
+    it('应该正确处理包含特殊字符的微小修改', () => {
+      const original = '包含特殊字符的文本: <>&"\'';
+      const modified = '包含特殊字符的文本: <>&"\' '; // 末尾多一个空格
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该识别为修改
+      expect(diffs.length).toBe(1);
+      expect(diffs[0].type).toBe('modified');
+    });
+
+    it('应该正确处理Unicode字符', () => {
+      const original = '包含Unicode: 🚀 🌟 测试中文';
+      const modified = '包含Unicode: 🎉 🌟 测试中文';
+
+      const diffs = compareDocuments(original, modified);
+
+      // 应该识别为修改（emoji变化，但文本相似）
+      expect(diffs.some((diff) => diff.type === 'modified')).toBe(true);
+    });
+
+    it('应该处理不同语言的文本', () => {
+      const original = 'Hello world\n你好世界\nこんにちは';
+      const modified = 'Hello world\n你好世界！\nこんにちは'; // 标点变化
+
+      const diffs = compareDocuments(original, modified);
+
+      expect(diffs.some((diff) => diff.type === 'modified')).toBe(true);
+    });
+  });
+});
diff --git a/modules/tool/packages/docDiff/test/integration-tolerance.test.ts b/modules/tool/packages/docDiff/test/integration-tolerance.test.ts
new file mode 100644
index 00000000..8e890811
--- /dev/null
+++ b/modules/tool/packages/docDiff/test/integration-tolerance.test.ts
@@ -0,0 +1,91 @@
+import { describe, it, expect } from 'bun:test';
+import { compareDocumentsWithTolerance } from '../src/diffAlgorithm';
+
+describe('增强的换行容差集成测试', () => {
+  it('应该处理OCR和docx之间的换行差异', () => {
+    const docxText = `这是完整的句子。
+这是另一个完整的句子，包含多个词语和标点符号。
+第三行也是完整的。`;
+
+    const ocrText = `这是完整的 句子。
+这是另一个 完整的句子，包含多个词语 和 标点符号。
+第三行 也是 完整的。`;
+
+    const diffs = compareDocumentsWithTolerance(docxText, ocrText, {
+      enableLineBreakTolerance: true,
+      scanRange: 2,
+      toleranceThreshold: 0.9
+    });
+
+    console.log('Diff结果:');
+    diffs.forEach((diff, index) => {
+      console.log(`${index + 1}. ${diff.type}: "${diff.original}" -> "${diff.modified}"`);
+    });
+
+    // 检查有多少行被识别为 unchanged
+    const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+
+    // 至少应该有一些行被识别为相同（考虑容差）
+    expect(unchangedDiffs.length).toBeGreaterThan(0);
+  });
+
+  it('应该处理文档开头和结尾的换行差异', () => {
+    // 测试文档开头
+    const original1 = `第一行
+第二行`;
+    const modified1 = `第一行第二行`;
+
+    const diffs1 = compareDocumentsWithTolerance(original1, modified1);
+
+    // 测试文档结尾
+    const original2 = `第一行
+第二行`;
+    const modified2 = `第一行
+第二行第三行`;
+
+    const diffs2 = compareDocumentsWithTolerance(original2, modified2);
+
+    // 两种情况都应该能正确处理
+    expect(diffs1.length).toBeGreaterThan(0);
+    expect(diffs2.length).toBeGreaterThan(0);
+
+    // 应该有较少的修改差异（由于换行容差）
+    const modifiedDiffs1 = diffs1.filter((diff) => diff.type === 'modified');
+    const modifiedDiffs2 = diffs2.filter((diff) => diff.type === 'modified');
+
+    expect(modifiedDiffs1.length).toBeLessThan(3);
+    expect(modifiedDiffs2.length).toBeLessThan(3);
+  });
+
+  it('应该处理完整的OCR文档场景', () => {
+    const ocrText = `这 是 OCR 识 别 的 文本。
+第 二行  继续测试，有 额外 空格。
+这是 第三行，包 含全 角标点符号！
+第 四行也是正 常内容。`;
+
+    const cleanText = `这是OCR识别的文本。
+第二行继续测试，有额外空格。
+这是第三行，包含全角标点符号!
+第四行也是正常内容。`;
+
+    const diffs = compareDocumentsWithTolerance(ocrText, cleanText, {
+      enableLineBreakTolerance: true,
+      scanRange: 3,
+      toleranceThreshold: 0.95
+    });
+
+    console.log('OCR场景测试结果:');
+    diffs.forEach((diff, index) => {
+      console.log(`${index + 1}. ${diff.type}: "${diff.original}" -> "${diff.modified}"`);
+    });
+
+    // 统计不同类型的差异
+    const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+    const modifiedDiffs = diffs.filter((diff) => diff.type === 'modified');
+
+    console.log(`统计: ${unchangedDiffs.length}个相同, ${modifiedDiffs.length}个修改`);
+
+    // 应该有一些相同的行（由于容差处理）
+    expect(unchangedDiffs.length).toBeGreaterThan(0);
+  });
+});
diff --git a/modules/tool/packages/docDiff/test/integration.test.ts b/modules/tool/packages/docDiff/test/integration.test.ts
new file mode 100644
index 00000000..6dac9cd3
--- /dev/null
+++ b/modules/tool/packages/docDiff/test/integration.test.ts
@@ -0,0 +1,70 @@
+import { describe, it, expect } from 'vitest';
+import { tool } from '../src/index';
+
+describe('docDiff 工具集成测试', () => {
+  it('应该正确处理开头插入行的场景', async () => {
+    const result = await tool({
+      originalText: `第1行
+第2行
+第3行`,
+      modifiedText: `新插入的行
+第1行
+第2行
+第3行`,
+      title: '开头插入测试'
+    });
+
+    expect(result).toHaveProperty('htmlUrl');
+    expect(result).toHaveProperty('diffs');
+    expect(Array.isArray(result.diffs)).toBe(true);
+
+    // 检查是否正确识别了新增的行
+    const addedDiffs = result.diffs.filter((diff) => diff.type === 'added');
+    expect(addedDiffs.length).toBe(1);
+    expect(addedDiffs[0].modified).toBe('新插入的行');
+  });
+
+  it('应该正确处理复杂修改场景', async () => {
+    const result = await tool({
+      originalText: `这是原始文档的第一行
+这是要修改的行
+这是第三行`,
+      modifiedText: `这是原始文档的第一行
+这是修改后的行
+这是新增的行
+这是第三行`,
+      title: '复杂场景测试'
+    });
+
+    expect(result).toHaveProperty('htmlUrl');
+    expect(result.diffs.length).toBeGreaterThan(0);
+
+    const types = result.diffs.map((diff) => diff.type);
+    // 严格模式下应该有新增、删除操作，但没有修改类型
+    expect(types).toContain('added');
+    expect(types).toContain('removed');
+    expect(types).not.toContain('modified');
+  });
+
+  it('应该能处理只有一行的文档对比', async () => {
+    const result = await tool({
+      originalText: '单行内容',
+      modifiedText: '修改后的单行内容',
+      title: '单行文档测试'
+    });
+
+    expect(result).toHaveProperty('htmlUrl');
+    expect(result.diffs.length).toBeGreaterThan(0);
+  });
+
+  it('应该能处理相同文档', async () => {
+    const result = await tool({
+      originalText: '相同内容',
+      modifiedText: '相同内容',
+      title: '相同文档测试'
+    });
+
+    expect(result).toHaveProperty('htmlUrl');
+    expect(result.diffs.length).toBe(0);
+  });
+});
diff --git a/modules/tool/packages/docDiff/test/lineBreakTolerance.test.ts b/modules/tool/packages/docDiff/test/lineBreakTolerance.test.ts
new file mode 100644
index 00000000..63a542e9
--- /dev/null
+++ b/modules/tool/packages/docDiff/test/lineBreakTolerance.test.ts
@@ -0,0 +1,393 @@
+import { describe, it, expect } from 'bun:test';
+import {
+  compareWithLineBreakTolerance,
+  compareDocumentsWithTolerance,
+  type LineBreakToleranceOptions
+} from '../src/diffAlgorithm';
+
+describe('换行容差功能', () => {
+  const defaultOptions: LineBreakToleranceOptions = {
+    enableLineBreakTolerance: true,
+    scanRange: 3,
+    toleranceThreshold: 0.95
+  };
+
+  describe('compareWithLineBreakTolerance', () => {
+    it('应该检测到完全相同的行', () => {
+      const originalLines = ['这是第一行', '这是第二行'];
+      const modifiedLines = ['这是第一行', '这是第二行'];
+
+      const result = compareWithLineBreakTolerance(
+        '这是第一行',
+        '这是第一行',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该检测到换行差异：单行拆分为多行', () => {
+      const originalLines = ['这是一整行文本'];
+      const modifiedLines = ['这是一整行', '文本'];
+
+      const result = compareWithLineBreakTolerance(
+        '这是一整行',
+        '这是一整行',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该检测到换行差异：多行合并为单行', () => {
+      const originalLines = ['这是第一行', '这是第二行'];
+      const modifiedLines = ['这是第一行这是第二行'];
+
+      const result = compareWithLineBreakTolerance(
+        '这是第一行',
+        '这是第一行这是第二行',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该检测到换行差异：复杂的多行重组', () => {
+      const originalLines = [
+        '函数的参数列表包括：',
+        'name：用户名',
+        'age：年龄',
+        'email：邮箱地址'
+      ];
+      const modifiedLines = ['函数的参数列表包括：name：用户名age：年龄email：邮箱地址'];
+
+      const result = compareWithLineBreakTolerance(
+        '函数的参数列表包括：',
+        '函数的参数列表包括：name：用户名age：年龄email：邮箱地址',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该对高相似度的内容应用容差', () => {
+      const originalLines = ['这是原始文本内容'];
+      const modifiedLines = ['这是原始文本文内容']; // 少量差异
+
+      const result = compareWithLineBreakTolerance(
+        '这是原始文本内容',
+        '这是原始文本文内容',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true); // 相似度应该超过 0.95 阈值
+    });
+
+    it('应该对差异过大的内容不应用容差', () => {
+      const originalLines = ['这是第一段内容'];
+      const modifiedLines = ['这是完全不同的第二段内容'];
+
+      const result = compareWithLineBreakTolerance(
+        '这是第一段内容',
+        '这是完全不同的第二段内容',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(false);
+    });
+
+    it('应该在禁用容差时返回 false', () => {
+      const originalLines = ['这是一整行文本'];
+      const modifiedLines = ['这是一整行', '文本'];
+
+      const result = compareWithLineBreakTolerance(
+        '这是一整行',
+        '这是一整行',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        { ...defaultOptions, enableLineBreakTolerance: false }
+      );
+
+      expect(result).toBe(false);
+    });
+
+    it('应该正确处理扫描范围限制', () => {
+      const originalLines = ['第1行', '第2行', '第3行', '第4行', '第5行'];
+      const modifiedLines = ['第1行', '第2行第3行第4行第5行'];
+
+      const result = compareWithLineBreakTolerance(
+        '第2行',
+        '第2行第3行第4行第5行',
+        originalLines,
+        modifiedLines,
+        1, // 从第2行开始
+        0,
+        { ...defaultOptions, scanRange: 2 }
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该处理大小写混合的内容', () => {
+      const originalLines = ['HelloWorld Test'];
+      const modifiedLines = ['hello', 'world', 'test'];
+
+      const result = compareWithLineBreakTolerance(
+        'HelloWorld Test',
+        'hello',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true); // 转换为小写后应该匹配
+    });
+  });
+
+  describe('compareDocumentsWithTolerance', () => {
+    it('应该处理简单的换行差异', () => {
+      const original = `这是第一段文本。
+这是第二段文本。`;
+      const modified = `这是第一段文本。这是第二段文本。`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 应该识别为未修改（换行容差生效）
+      expect(diffs.length).toBe(2);
+      expect(diffs[0].type).toBe('unchanged');
+      expect(diffs[1].type).toBe('unchanged');
+    });
+
+    it('应该保持对真正修改的检测', () => {
+      const original = `这是原始文本。
+这是另一段文本。`;
+      const modified = `这是修改后的文本。
+这是另一段文本。`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 第一段应该被识别为修改（内容确实不同）
+      expect(diffs.some((diff) => diff.type === 'modified')).toBe(true);
+      expect(diffs.some((diff) => diff.original === '这是原始文本。')).toBe(true);
+      expect(diffs.some((diff) => diff.modified === '这是修改后的文本。')).toBe(true);
+    });
+
+    it('应该处理复杂的文档结构', () => {
+      const original = `标题
+
+第一章
+这是第一章的内容。
+
+第二章
+这是第二章的内容。
+
+结论
+文档结束。`;
+
+      const modified = `标题
+
+第一章这是第一章的内容。
+
+第二章这是第二章的内容。
+
+结论文档结束。`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 应该检测到标题未修改
+      expect(diffs.some((diff) => diff.type === 'unchanged' && diff.original === '标题')).toBe(
+        true
+      );
+
+      // 章节内容应该通过容差处理
+      expect(diffs.filter((diff) => diff.type === 'modified').length).toBeLessThan(3);
+    });
+
+    it('应该处理混合换行和内容修改', () => {
+      const original = `第一段内容。
+第二段内容。
+第三段内容。`;
+
+      const modified = `第一段修改后的内容。第二段内容。
+第三段内容。`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 应该检测到第一段的修改
+      expect(
+        diffs.some(
+          (diff) =>
+            diff.type === 'modified' &&
+            diff.original === '第一段内容。' &&
+            diff.modified === '第一段修改后的内容。第二段内容。'
+        )
+      ).toBe(true);
+    });
+
+    it('应该在禁用容差时使用严格比较', () => {
+      const original = `这是第一行。
+这是第二行。`;
+      const modified = `这是第一行。这是第二行。`;
+
+      const strictDiffs = compareDocumentsWithTolerance(original, modified, {
+        ...defaultOptions,
+        enableLineBreakTolerance: false
+      });
+
+      const tolerantDiffs = compareDocumentsWithTolerance(original, modified, {
+        ...defaultOptions,
+        enableLineBreakTolerance: true
+      });
+
+      // 禁用容差应该产生更多差异
+      expect(strictDiffs.length).toBeGreaterThan(tolerantDiffs.length);
+    });
+
+    it('应该处理空内容', () => {
+      const original = '';
+      const modified = '';
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      expect(diffs.length).toBe(0);
+    });
+
+    it('应该处理单行文档', () => {
+      const original = '这是单行文本';
+      const modified = '这是单行文本';
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      expect(diffs.length).toBe(1);
+      expect(diffs[0].type).toBe('unchanged');
+    });
+
+    it('应该处理大量换行符的情况', () => {
+      const original = `第一段
+
+
+
+第二段`;
+
+      const modified = `第一段
+第二段`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 应该正确处理多余的换行符
+      expect(diffs.some((diff) => diff.type === 'unchanged' && diff.original === '第一段')).toBe(
+        true
+      );
+      expect(diffs.some((diff) => diff.type === 'unchanged' && diff.original === '第二段')).toBe(
+        true
+      );
+    });
+  });
+
+  describe('性能测试', () => {
+    it('应该在合理时间内处理大文档', () => {
+      // 生成大文档
+      const originalLines = [];
+      const modifiedLines = [];
+
+      for (let i = 0; i < 100; i++) {
+        originalLines.push(`这是第${i}段文本内容。`);
+        if (i % 10 === 0) {
+          // 每10段合并一次
+          modifiedLines.push(`这是第${i}段文本内容。这是第${i + 1}段文本内容。`);
+          i++; // 跳过下一个
+        } else {
+          modifiedLines.push(`这是第${i}段文本内容。`);
+        }
+      }
+
+      const original = originalLines.join('\n');
+      const modified = modifiedLines.join('\n');
+
+      const startTime = Date.now();
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+      const endTime = Date.now();
+
+      // 应该在合理时间内完成（2秒以内）
+      expect(endTime - startTime).toBeLessThan(2000);
+      expect(diffs.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe('边界情况', () => {
+    it('应该处理完全相同的文档', () => {
+      const text = `这是相同的文档内容。
+没有任何差异。`;
+
+      const diffs = compareDocumentsWithTolerance(text, text, defaultOptions);
+
+      expect(diffs.every((diff) => diff.type === 'unchanged')).toBe(true);
+    });
+
+    it('应该处理完全不同的文档', () => {
+      const original = '这是原始文档';
+      const modified = '这是完全不同的文档';
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 应该检测到修改
+      expect(diffs.some((diff) => diff.type === 'modified')).toBe(true);
+    });
+
+    it('应该处理只包含空格的行', () => {
+      const original = `第一行
+
+第三行`;
+      const modified = `第一行第三行`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 空行应该被容差处理
+      expect(diffs.some((diff) => diff.type === 'unchanged' && diff.original === '第一行')).toBe(
+        true
+      );
+      expect(diffs.some((diff) => diff.type === 'unchanged' && diff.original === '第三行')).toBe(
+        true
+      );
+    });
+
+    it('应该处理特殊字符', () => {
+      const original = `特殊字符：!@#$%^&*()
+中文标点：，。！？`;
+      const modified = `特殊字符：!@#$%^&*()中文标点：，。！？`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified, defaultOptions);
+
+      // 应该通过容差处理换行
+      expect(diffs.length).toBeGreaterThanOrEqual(1);
+    });
+  });
+});
diff --git a/modules/tool/packages/docDiff/test/lineTolerance.test.ts b/modules/tool/packages/docDiff/test/lineTolerance.test.ts
new file mode 100644
index 00000000..5c6ad5df
--- /dev/null
+++ b/modules/tool/packages/docDiff/test/lineTolerance.test.ts
@@ -0,0 +1,378 @@
+import { describe, it, expect } from 'bun:test';
+import {
+  compareWithLineBreakTolerance,
+  compareDocumentsWithTolerance,
+  type LineBreakToleranceOptions
+} from '../src/diffAlgorithm';
+import type { ParagraphDiff } from '../src/diffAlgorithm';
+
+describe('换行容差算法', () => {
+  describe('compareWithLineBreakTolerance', () => {
+    const defaultOptions: LineBreakToleranceOptions = {
+      enableLineBreakTolerance: true,
+      scanRange: 3,
+      toleranceThreshold: 0.95
+    };
+
+    it('应该识别完全相同的行', () => {
+      const originalLine = '这是相同的文本';
+      const modifiedLine = '这是相同的文本';
+      const originalLines = ['这是相同的文本', '下一行'];
+      const modifiedLines = ['这是相同的文本', '下一行'];
+
+      const result = compareWithLineBreakTolerance(
+        originalLine,
+        modifiedLine,
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该识别换行差异（两行合并为一行）', () => {
+      const originalLines = ['第一行', '第二行', '第三行'];
+      const modifiedLines = ['第一行第二行', '第三行'];
+
+      // 测试原始文档的第一行 vs 修改文档的第一行
+      const result = compareWithLineBreakTolerance(
+        '第一行',
+        '第一行第二行',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该识别换行差异（一行拆分为两行）', () => {
+      const originalLines = ['第一行第二行', '第三行'];
+      const modifiedLines = ['第一行', '第二行', '第三行'];
+
+      // 测试原始文档的第一行 vs 修改文档的第一行
+      const result = compareWithLineBreakTolerance(
+        '第一行第二行',
+        '第一行',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该处理复杂的换行重组', () => {
+      const originalLines = ['这是第一段文本，', '内容比较长。', '这是第二段文本。'];
+      const modifiedLines = ['这是第一段文本，内容比较长。', '这是第二段文本。'];
+
+      const result = compareWithLineBreakTolerance(
+        '这是第一段文本，',
+        '这是第一段文本，内容比较长。',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该处理空格差异的换行容差', () => {
+      const originalLines = ['单词1  单词2', '单词3'];
+      const modifiedLines = ['单词1', '单词2', '单词3'];
+
+      const result = compareWithLineBreakTolerance(
+        '单词1  单词2',
+        '单词1',
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(true);
+    });
+
+    it('应该在禁用时返回 false', () => {
+      const originalLine = '第一行';
+      const modifiedLine = '第一行第二行';
+      const originalLines = ['第一行', '第二行'];
+      const modifiedLines = ['第一行第二行'];
+
+      const result = compareWithLineBreakTolerance(
+        originalLine,
+        modifiedLine,
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        { ...defaultOptions, enableLineBreakTolerance: false }
+      );
+
+      expect(result).toBe(false);
+    });
+
+    it('应该拒绝真正的差异', () => {
+      const originalLine = '这是原始文本';
+      const modifiedLine = '这是完全不同的文本';
+      const originalLines = ['这是原始文本', '其他内容'];
+      const modifiedLines = ['这是完全不同的文本', '其他内容'];
+
+      const result = compareWithLineBreakTolerance(
+        originalLine,
+        modifiedLine,
+        originalLines,
+        modifiedLines,
+        0,
+        0,
+        defaultOptions
+      );
+
+      expect(result).toBe(false);
+    });
+
+    it('应该处理扫描范围边界', () => {
+      const originalLines = Array.from({ length: 10 }, (_, i) => `行${i}`);
+      const modifiedLines = Array.from({ length: 10 }, (_, i) => `行${i}`);
+
+      // 在中间位置测试
+      const result = compareWithLineBreakTolerance(
+        '行5',
+        '行5',
+        originalLines,
+        modifiedLines,
+        5,
+        5,
+        { ...defaultOptions, scanRange: 2 }
+      );
+
+      expect(result).toBe(true);
+    });
+  });
+
+  describe('compareDocumentsWithTolerance', () => {
+    it('应该处理简单的换行差异', () => {
+      const original = `第一行
+第二行
+第三行`;
+
+      const modified = `第一行第二行
+第三行`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      // 应该只有一个 unchanged（换行容差处理）
+      expect(diffs.length).toBe(2);
+      expect(diffs[0].type).toBe('unchanged');
+      expect(diffs[1].type).toBe('unchanged');
+    });
+
+    it('应该处理一行拆分为多行', () => {
+      const original = `第一行第二行
+第三行`;
+
+      const modified = `第一行
+第二行
+第三行`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      // 应该都识别为 unchanged（换行容差处理）
+      expect(diffs.length).toBe(3);
+      expect(diffs.every((diff) => diff.type === 'unchanged')).toBe(true);
+    });
+
+    it('应该保持对真实差异的敏感度', () => {
+      const original = `第一行
+第二行
+第三行`;
+
+      const modified = `第一行
+修改的第二行
+第三行`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      // 应该识别出修改
+      const modifiedDiffs = diffs.filter((diff) => diff.type === 'modified');
+      expect(modifiedDiffs.length).toBe(1);
+      expect(modifiedDiffs[0].original).toBe('第二行');
+      expect(modifiedDiffs[0].modified).toBe('修改的第二行');
+    });
+
+    it('应该处理混合场景（换行差异 + 真实修改）', () => {
+      const original = `第一段内容，
+继续第二段。
+第三行 unchanged`;
+
+      const modified = `第一段内容，继续第二段。
+第三行已修改`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      // 应该识别出换行容差和真实修改
+      const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+      const modifiedDiffs = diffs.filter((diff) => diff.type === 'modified');
+
+      expect(modifiedDiffs.length).toBe(1);
+      expect(modifiedDiffs[0].original).toContain('第三行 unchanged');
+      expect(modifiedDiffs[0].modified).toContain('第三行已修改');
+    });
+
+    it('应该处理增加和删除', () => {
+      const original = `第一行
+第二行
+第三行`;
+
+      const modified = `新增的第一行
+第一行
+第三行`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      const addedDiffs = diffs.filter((diff) => diff.type === 'added');
+      const removedDiffs = diffs.filter((diff) => diff.type === 'removed');
+      const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+
+      expect(addedDiffs.length).toBe(1);
+      expect(addedDiffs[0].modified).toBe('新增的第一行');
+      expect(removedDiffs.length).toBe(1);
+      expect(removedDiffs[0].original).toBe('第二行');
+      expect(unchangedDiffs.length).toBeGreaterThan(0);
+    });
+
+    it('应该处理空文档情况', () => {
+      const diffs1 = compareDocumentsWithTolerance('', '新内容');
+      const diffs2 = compareDocumentsWithTolerance('原始内容', '');
+
+      expect(diffs1.length).toBe(1);
+      expect(diffs1[0].type).toBe('added');
+      expect(diffs1[0].modified).toBe('新内容');
+
+      expect(diffs2.length).toBe(1);
+      expect(diffs2[0].type).toBe('removed');
+      expect(diffs2[0].original).toBe('原始内容');
+    });
+
+    it('应该处理相同文档', () => {
+      const text = `第一行
+第二行
+第三行`;
+
+      const diffs = compareDocumentsWithTolerance(text, text);
+
+      expect(diffs.length).toBe(3);
+      expect(diffs.every((diff) => diff.type === 'unchanged')).toBe(true);
+    });
+
+    it('应该处理不同的扫描范围设置', () => {
+      const original = `第一行
+第二行
+第三行
+第四行`;
+
+      const modified = `第一行第二行
+第三行
+第四行`;
+
+      // 较小的扫描范围
+      const diffs1 = compareDocumentsWithTolerance(original, modified, {
+        enableLineBreakTolerance: true,
+        scanRange: 1,
+        toleranceThreshold: 0.95
+      });
+
+      // 较大的扫描范围
+      const diffs2 = compareDocumentsWithTolerance(original, modified, {
+        enableLineBreakTolerance: true,
+        scanRange: 5,
+        toleranceThreshold: 0.95
+      });
+
+      // 两种情况下都应该能处理换行差异
+      expect(diffs1.every((diff) => diff.type === 'unchanged')).toBe(true);
+      expect(diffs2.every((diff) => diff.type === 'unchanged')).toBe(true);
+    });
+
+    it('应该处理不同的相似度阈值', () => {
+      const original = `文本行1
+文本行2`;
+
+      const modified = `文本行1文本行2`;
+
+      // 高阈值
+      const diffs1 = compareDocumentsWithTolerance(original, modified, {
+        enableLineBreakTolerance: true,
+        scanRange: 3,
+        toleranceThreshold: 0.99
+      });
+
+      // 低阈值
+      const diffs2 = compareDocumentsWithTolerance(original, modified, {
+        enableLineBreakTolerance: true,
+        scanRange: 3,
+        toleranceThreshold: 0.8
+      });
+
+      // 高阈值情况下可能不会识别为容差，低阈值会识别
+      expect(diffs1.length).toBeGreaterThanOrEqual(0);
+      expect(diffs2.length).toBeGreaterThanOrEqual(0);
+    });
+  });
+
+  describe('复杂场景测试', () => {
+    it('应该处理段落级别的换行差异', () => {
+      const original = `这是第一段文本。
+内容比较长，被分成了多行。
+这是第二段文本。`;
+
+      const modified = `这是第一段文本。内容比较长，被分成了多行。
+这是第二段文本。`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      // 大部分内容应该被识别为 unchanged
+      const unchangedDiffs = diffs.filter((diff) => diff.type === 'unchanged');
+      expect(unchangedDiffs.length).toBeGreaterThan(0);
+    });
+
+    it('应该处理表格相关的换行差异', () => {
+      const original = `| 列1 | 列2 |
+|-----|-----|
+| 值1 | 值2 |`;
+
+      const modified = `| 列1 | 列2 |
+|-----|-----|
+| 值1 |
+值2 |`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      // 应该能处理表格中的换行差异
+      expect(diffs.length).toBeGreaterThan(0);
+    });
+
+    it('应该处理代码块中的换行差异', () => {
+      const original = `function test() {
+  return true;
+}`;
+
+      const modified = `function test() { return true; }`;
+
+      const diffs = compareDocumentsWithTolerance(original, modified);
+
+      // 应该能处理代码中的换行差异
+      expect(diffs.length).toBeGreaterThanOrEqual(0);
+    });
+  });
+});