Skip to content

Commit

Permalink
Simplify and comment UnifiedEmitter logic
Browse files Browse the repository at this point in the history
  • Loading branch information
jcheng5 committed Jun 30, 2011
1 parent 87023a4 commit 456728d
Showing 1 changed file with 94 additions and 52 deletions.
Expand Up @@ -20,6 +20,22 @@
import java.util.Iterator;
import java.util.List;

/**
* This class is used to subset an existing patch (the existing patch is
* modeled in DiffChunk and the subset of changes we want to keep is zero
* or more ArrayList<Line>).
*
* It works by using the existing patch to recreate the original data, then
* merging with the specific changes we want to keep.
*
* The output is in "Unified diff" format.
*
* You can also use this class to generate reverse selective patches (basically
* the same as above, but with the effect of undoing the patch on the changed
* file, rather than applying the patch to the original file) by simply
* reversing the DiffChunk (DiffChunk.reverse()) and lines (Line.reverseLines())
* before calling addDiffs().
*/
public class UnifiedEmitter
{
public UnifiedEmitter(String relPath)
Expand Down Expand Up @@ -48,35 +64,24 @@ public String createPatch()

StringBuilder p = new StringBuilder();

// Write file header
p.append("--- ").append(fileA_).append(EOL);
p.append("+++ ").append(fileB_).append(EOL);

for (DiffChunk chunk : chunks)
{
p.append("@@ -")
.append(chunk.oldRowStart)
.append(',')
.append(chunk.oldRowCount)
.append(" +")
.append(chunk.newRowStart)
.append(',')
.append(chunk.newRowCount)
.append(" @@")
.append(EOL);
// Write chunk header: @@ -A,B +C,D @@
p.append("@@ -").append(chunk.oldRowStart).append(',').append(chunk.oldRowCount)
.append(" +").append(chunk.newRowStart).append(',').append(chunk.newRowCount)
.append(" @@").append(EOL);

for (Line line : chunk.diffLines)
{
switch (line.getType())
{
case Same:
p.append(' ');
break;
case Insertion:
p.append('+');
break;
case Deletion:
p.append('-');
break;
case Same: p.append(' '); break;
case Insertion: p.append('+'); break;
case Deletion: p.append('-'); break;
default:
throw new IllegalArgumentException();
}
Expand All @@ -87,6 +92,14 @@ public String createPatch()
return p.toString();
}

/**
* Divide a list of sorted lines into DiffChunks.
*
* NOTE: If we cared about compact diffs we could detect long runs of
* unchanged lines and elide them, like diff tools usually do. (Currently
* we keep all the lines we're given, and only use discontinuities to
* break up into chunks.)
*/
private ArrayList<DiffChunk> toDiffChunks(ArrayList<Line> lines)
{
ArrayList<DiffChunk> chunks = new ArrayList<DiffChunk>();
Expand All @@ -96,19 +109,26 @@ private ArrayList<DiffChunk> toDiffChunks(ArrayList<Line> lines)

int line = lines.get(0).getOldLine();

// The index of the earliest line that hasn't been put into a chunk yet
int head = 0;

for (int i = 1; i < lines.size(); i++)
{
if ((lines.get(i).getOldLine() - line) > 1)
{
// There's a gap between this line and the previous line. Turn
// the previous contiguous run into a DiffChunk.

List<Line> sublist = lines.subList(head, i);
chunks.add(contiguousLinesToChunk(sublist));

// This line is now the start of a new contiguous run.
head = i;
}
line = lines.get(i).getOldLine();
}

// Add final chunk
// Add final contiguous run
List<Line> sublist = lines.subList(head, lines.size());
chunks.add(contiguousLinesToChunk(sublist));

Expand All @@ -127,27 +147,35 @@ private DiffChunk contiguousLinesToChunk(List<Line> sublist)
new ArrayList<Line>(sublist));
}

/**
* Here is where the heavy lifting of merging is done
*/
private ArrayList<Line> generateOutputLines()
{
// Clean up contextLines_ so it only contains lines that are part of
// the original document.
for (int i = 0; i < contextLines_.size(); i++)
if (contextLines_.get(i).getType() == Type.Insertion)
{
contextLines_.remove(i);
i--;
}
contextLines_.remove(i--);

// Clean up diffLines_ so it only contains lines that represent actual
// changes. If we don't do this then the merge logic gets very confusing!
for (int i = 0; i < diffLines_.size(); i++)
if (diffLines_.get(i).getType() == Type.Same)
{
diffLines_.remove(i);
i--;
}
diffLines_.remove(i--);

// Check to see if maybe there's nothing to do
if (diffLines_.size() == 0)
return new ArrayList<Line>();

// It's quite possible that the same DiffChunk was added multiple times.
// (Less likely--maybe impossible--is for overlapping DiffChunks to be
// added, but that would be dealt with by this as long as those DiffChunks
// contain consistent data.)
Collections.sort(contextLines_);
DuplicateHelper.dedupeSortedList(contextLines_);

// Clean up all the diff lines as well.
Collections.sort(diffLines_);
DuplicateHelper.dedupeSortedList(diffLines_);

Expand All @@ -158,46 +186,63 @@ private ArrayList<Line> generateOutputLines()
Line ctx = ctxit.hasNext() ? ctxit.next() : null;
Line dff = dffit.hasNext() ? dffit.next() : null;

/**
* Now we have two ordered iterators, one for the context (original
* document) and one for the diffs we want to apply to it. We want to
* merge them together into the output ArrayList in the proper order,
* being careful to throw out any context lines that are made obsolete
* by the diff lines.
*/

// Tracks the amount that the "new" line numbers are offset from the "old"
// line numbers. new = old + skew
int skew = 0;
int lastKnownOldLine = 0;

// Do this while loop while both iterators still have elements
while (ctx != null && dff != null)
{
// Now we have a context line (ctx) and a diff line (dff) in hand.

int cmp = ctx.getOldLine() - dff.getOldLine();
if (cmp < 0 || (cmp == 0 && !ctx.equals(dff)))
if (cmp == 0 && ctx.equals(dff))
{
if (ctx.getOldLine() > lastKnownOldLine)
{
processContextLine(output, ctx, skew);
lastKnownOldLine = ctx.getOldLine();
}
/**
* ctx and dff are identical. And since we dropped Insertions from
* contextLines_ and Sames from diffLines_, we know they're
* Deletions. The dff takes precedence; we need to discard ctx so
* the line actually gets deleted.
*/
ctx = ctxit.hasNext() ? ctxit.next() : null;
continue;
}

// In the case where cmp == 0, the oldLine properties were equal but
// the newLine properties were not. This means the diff is an
// insertion. We let the ctx line go first so the insertion happens
// in the right place.
if (cmp <= 0)
{
processContextLine(output, ctx, skew);
ctx = ctxit.hasNext() ? ctxit.next() : null;
}
else
{
skew = processDiffLine(output, dff, skew);
lastKnownOldLine = dff.getOldLine();
dff = dffit.hasNext() ? dffit.next() : null;
}
}

// Finish off the context iterator if necessary
while (ctx != null)
{
if (ctx.getOldLine() > lastKnownOldLine)
{
processContextLine(output, ctx, skew);
lastKnownOldLine = ctx.getOldLine();
}
processContextLine(output, ctx, skew);
ctx = ctxit.hasNext() ? ctxit.next() : null;
}

// Finish off the diff iterator if necessary
while (dff != null)
{
skew = processDiffLine(output, dff, skew);
//lastKnownOldLine = dff.getOldLine(); // no longer necessary
dff = dffit.hasNext() ? dffit.next() : null;
}

Expand All @@ -221,23 +266,16 @@ private void processContextLine(ArrayList<Line> output, Line ctx, int skew)
ctx.getOldLine() + skew,
ctx.getText()));
break;
case Insertion:
// This is a line that, in the original diff, was inserted into
// orig. But since we're processing it as context, we ignore the
// insertion, and let the line drop on the floor.
break;
default:
assert false : "Unexpected context line type";
throw new IllegalStateException();
}
}

private int processDiffLine(ArrayList<Line> output, Line dff, int skew)
{
switch (dff.getType())
{
case Same:
output.add(new Line(Type.Same, dff.getOldLine(),
dff.getOldLine() + skew,
dff.getText()));
break;
case Deletion:
output.add(new Line(Type.Deletion, dff.getOldLine(),
dff.getOldLine() + skew,
Expand All @@ -249,6 +287,10 @@ private int processDiffLine(ArrayList<Line> output, Line dff, int skew)
dff.getOldLine() + skew,
dff.getText()));
skew++;
break;
default:
assert false : "Unexpected diff line type";
throw new IllegalStateException();
}
return skew;
}
Expand Down

0 comments on commit 456728d

Please sign in to comment.