Skip to content

Commit 884cd74

Browse files
author
epriestley
committed
In prose diffs, use hash-and-diff for coarse "level 0" diffing to scale better
Summary: Depends on D20838. Fixes T13414. Instead of doing coarse diffing with "PhutilEditDistanceMatrix", use hash-and-diff with "DocumentEngine". Test Plan: - On a large document (~3K top level blocks), saw a more sensible diff, instead of the whole thing falling back to "everything changed" mode. - On a small document, still saw a sensible granular diff. {F6888249} Maniphest Tasks: T13414 Differential Revision: https://secure.phabricator.com/D20839
1 parent 9d884f1 commit 884cd74

File tree

4 files changed

+130
-48
lines changed

4 files changed

+130
-48
lines changed

src/__phutil_library_map__.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12402,7 +12402,7 @@
1240212402
'PhutilPHPCodeSnippetContextFreeGrammar' => 'PhutilCLikeCodeSnippetContextFreeGrammar',
1240312403
'PhutilPhabricatorAuthAdapter' => 'PhutilOAuthAuthAdapter',
1240412404
'PhutilProseDiff' => 'Phobject',
12405-
'PhutilProseDiffTestCase' => 'PhutilTestCase',
12405+
'PhutilProseDiffTestCase' => 'PhabricatorTestCase',
1240612406
'PhutilProseDifferenceEngine' => 'Phobject',
1240712407
'PhutilQueryString' => 'Phobject',
1240812408
'PhutilRealNameContextFreeGrammar' => 'PhutilContextFreeGrammar',

src/applications/files/diff/PhabricatorDocumentEngineBlocks.php

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,35 @@ public function newTwoUpLayout() {
5050

5151
if ($old_line) {
5252
$old_hash = rtrim($old_line['text'], "\n");
53-
$old_block = array_shift($old_map[$old_hash]);
54-
$old_block->setDifferenceType($old_line['type']);
53+
if (!strlen($old_hash)) {
54+
// This can happen when one of the sources has no blocks.
55+
$old_block = null;
56+
} else {
57+
$old_block = array_shift($old_map[$old_hash]);
58+
$old_block->setDifferenceType($old_line['type']);
59+
}
5560
} else {
5661
$old_block = null;
5762
}
5863

5964
if ($new_line) {
6065
$new_hash = rtrim($new_line['text'], "\n");
61-
$new_block = array_shift($new_map[$new_hash]);
62-
$new_block->setDifferenceType($new_line['type']);
66+
if (!strlen($new_hash)) {
67+
$new_block = null;
68+
} else {
69+
$new_block = array_shift($new_map[$new_hash]);
70+
$new_block->setDifferenceType($new_line['type']);
71+
}
6372
} else {
6473
$new_block = null;
6574
}
6675

76+
// If both lists are empty, we may generate a row which has two empty
77+
// blocks.
78+
if (!$old_block && !$new_block) {
79+
continue;
80+
}
81+
6782
$rows[] = array(
6883
$old_block,
6984
$new_block,

src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php

Lines changed: 108 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,47 +10,14 @@ private function buildDiff($u, $v, $level) {
1010
$u_parts = $this->splitCorpus($u, $level);
1111
$v_parts = $this->splitCorpus($v, $level);
1212

13-
$matrix = id(new PhutilEditDistanceMatrix())
14-
->setMaximumLength(128)
15-
->setSequences($u_parts, $v_parts)
16-
->setComputeString(true);
17-
18-
// For word-level and character-level changes, smooth the output string
19-
// to reduce the choppiness of the diff.
20-
if ($level > 1) {
21-
$matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL);
22-
}
23-
24-
$u_pos = 0;
25-
$v_pos = 0;
26-
27-
$edits = $matrix->getEditString();
28-
$edits_length = strlen($edits);
29-
30-
$diff = new PhutilProseDiff();
31-
for ($ii = 0; $ii < $edits_length; $ii++) {
32-
$c = $edits[$ii];
33-
if ($c == 's') {
34-
$diff->addPart('=', $u_parts[$u_pos]);
35-
$u_pos++;
36-
$v_pos++;
37-
} else if ($c == 'd') {
38-
$diff->addPart('-', $u_parts[$u_pos]);
39-
$u_pos++;
40-
} else if ($c == 'i') {
41-
$diff->addPart('+', $v_parts[$v_pos]);
42-
$v_pos++;
43-
} else if ($c == 'x') {
44-
$diff->addPart('-', $u_parts[$u_pos]);
45-
$diff->addPart('+', $v_parts[$v_pos]);
46-
$u_pos++;
47-
$v_pos++;
48-
} else {
49-
throw new Exception(
50-
pht(
51-
'Unexpected character ("%s") in edit string.',
52-
$c));
53-
}
13+
if ($level === 0) {
14+
$diff = $this->newHashDiff($u_parts, $v_parts);
15+
$too_large = false;
16+
} else {
17+
list($diff, $too_large) = $this->newEditDistanceMatrixDiff(
18+
$u_parts,
19+
$v_parts,
20+
$level);
5421
}
5522

5623
$diff->reorderParts();
@@ -119,7 +86,7 @@ private function buildDiff($u, $v, $level) {
11986
} else if (!strlen($new)) {
12087
$result->addPart('-', $old);
12188
} else {
122-
if ($matrix->didReachMaximumLength()) {
89+
if ($too_large) {
12390
// If this text was too big to diff, don't try to subdivide it.
12491
$result->addPart('-', $old);
12592
$result->addPart('+', $new);
@@ -206,4 +173,103 @@ private function stitchPieces(array $pieces, $level) {
206173
return $results;
207174
}
208175

176+
private function newEditDistanceMatrixDiff(
177+
array $u_parts,
178+
array $v_parts,
179+
$level) {
180+
181+
$matrix = id(new PhutilEditDistanceMatrix())
182+
->setMaximumLength(128)
183+
->setSequences($u_parts, $v_parts)
184+
->setComputeString(true);
185+
186+
// For word-level and character-level changes, smooth the output string
187+
// to reduce the choppiness of the diff.
188+
if ($level > 1) {
189+
$matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL);
190+
}
191+
192+
$u_pos = 0;
193+
$v_pos = 0;
194+
195+
$edits = $matrix->getEditString();
196+
$edits_length = strlen($edits);
197+
198+
$diff = new PhutilProseDiff();
199+
for ($ii = 0; $ii < $edits_length; $ii++) {
200+
$c = $edits[$ii];
201+
if ($c == 's') {
202+
$diff->addPart('=', $u_parts[$u_pos]);
203+
$u_pos++;
204+
$v_pos++;
205+
} else if ($c == 'd') {
206+
$diff->addPart('-', $u_parts[$u_pos]);
207+
$u_pos++;
208+
} else if ($c == 'i') {
209+
$diff->addPart('+', $v_parts[$v_pos]);
210+
$v_pos++;
211+
} else if ($c == 'x') {
212+
$diff->addPart('-', $u_parts[$u_pos]);
213+
$diff->addPart('+', $v_parts[$v_pos]);
214+
$u_pos++;
215+
$v_pos++;
216+
} else {
217+
throw new Exception(
218+
pht(
219+
'Unexpected character ("%s") in edit string.',
220+
$c));
221+
}
222+
}
223+
224+
return array($diff, $matrix->didReachMaximumLength());
225+
}
226+
227+
private function newHashDiff(array $u_parts, array $v_parts) {
228+
229+
$u_ref = new PhabricatorDocumentRef();
230+
$v_ref = new PhabricatorDocumentRef();
231+
232+
$u_blocks = $this->newDocumentEngineBlocks($u_parts);
233+
$v_blocks = $this->newDocumentEngineBlocks($v_parts);
234+
235+
$rows = id(new PhabricatorDocumentEngineBlocks())
236+
->addBlockList($u_ref, $u_blocks)
237+
->addBlockList($v_ref, $v_blocks)
238+
->newTwoUpLayout();
239+
240+
$diff = new PhutilProseDiff();
241+
foreach ($rows as $row) {
242+
list($u_block, $v_block) = $row;
243+
244+
if ($u_block && $v_block) {
245+
if ($u_block->getDifferenceType() === '-') {
246+
$diff->addPart('-', $u_block->getContent());
247+
$diff->addPart('+', $v_block->getContent());
248+
} else {
249+
$diff->addPart('=', $u_block->getContent());
250+
}
251+
} else if ($u_block) {
252+
$diff->addPart('-', $u_block->getContent());
253+
} else {
254+
$diff->addPart('+', $v_block->getContent());
255+
}
256+
}
257+
258+
return $diff;
259+
}
260+
261+
private function newDocumentEngineBlocks(array $parts) {
262+
$blocks = array();
263+
264+
foreach ($parts as $part) {
265+
$hash = PhabricatorHash::digestForIndex($part);
266+
267+
$blocks[] = id(new PhabricatorDocumentEngineBlock())
268+
->setContent($part)
269+
->setDifferenceHash($hash);
270+
}
271+
272+
return $blocks;
273+
}
274+
209275
}

src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<?php
22

3-
final class PhutilProseDiffTestCase extends PhutilTestCase {
3+
final class PhutilProseDiffTestCase
4+
extends PhabricatorTestCase {
45

56
public function testProseDiffsDistance() {
67
$this->assertProseParts(

0 commit comments

Comments
 (0)