Simplify and comment UnifiedEmitter logic

rstudio · Jun 30, 2011 · 456728d · 456728d
1 parent 87023a4
commit 456728d
Showing 1 changed file with 94 additions and 52 deletions.
diff --git a/src/gwt/src/org/rstudio/studio/client/workbench/views/vcs/diff/UnifiedEmitter.java b/src/gwt/src/org/rstudio/studio/client/workbench/views/vcs/diff/UnifiedEmitter.java
@@ -20,6 +20,22 @@
 import java.util.Iterator;
 import java.util.List;
 
+/**
+ * This class is used to subset an existing patch (the existing patch is
+ * modeled in DiffChunk and the subset of changes we want to keep is zero
+ * or more ArrayList&lt;Line&gt;).
+ *
+ * It works by using the existing patch to recreate the original data, then
+ * merging with the specific changes we want to keep.
+ *
+ * The output is in "Unified diff" format.
+ *
+ * You can also use this class to generate reverse selective patches (basically
+ * the same as above, but with the effect of undoing the patch on the changed
+ * file, rather than applying the patch to the original file) by simply
+ * reversing the DiffChunk (DiffChunk.reverse()) and lines (Line.reverseLines())
+ * before calling addDiffs().
+ */
 public class UnifiedEmitter
 {
    public UnifiedEmitter(String relPath)
@@ -48,35 +64,24 @@ public String createPatch()
 
       StringBuilder p = new StringBuilder();
 
+      // Write file header
       p.append("--- ").append(fileA_).append(EOL);
       p.append("+++ ").append(fileB_).append(EOL);
 
       for (DiffChunk chunk : chunks)
       {
-         p.append("@@ -")
-               .append(chunk.oldRowStart)
-               .append(',')
-               .append(chunk.oldRowCount)
-               .append(" +")
-               .append(chunk.newRowStart)
-               .append(',')
-               .append(chunk.newRowCount)
-               .append(" @@")
-               .append(EOL);
+         // Write chunk header: @@ -A,B +C,D @@
+         p.append("@@ -").append(chunk.oldRowStart).append(',').append(chunk.oldRowCount)
+               .append(" +").append(chunk.newRowStart).append(',').append(chunk.newRowCount)
+               .append(" @@").append(EOL);
 
          for (Line line : chunk.diffLines)
          {
             switch (line.getType())
             {
-               case Same:
-                  p.append(' ');
-                  break;
-               case Insertion:
-                  p.append('+');
-                  break;
-               case Deletion:
-                  p.append('-');
-                  break;
+               case Same:       p.append(' '); break;
+               case Insertion:  p.append('+'); break;
+               case Deletion:   p.append('-'); break;
                default:
                   throw new IllegalArgumentException();
             }
@@ -87,6 +92,14 @@ public String createPatch()
       return p.toString();
    }
 
+   /**
+    * Divide a list of sorted lines into DiffChunks.
+    *
+    * NOTE: If we cared about compact diffs we could detect long runs of
+    * unchanged lines and elide them, like diff tools usually do. (Currently
+    * we keep all the lines we're given, and only use discontinuities to
+    * break up into chunks.)
+    */
    private ArrayList<DiffChunk> toDiffChunks(ArrayList<Line> lines)
    {
       ArrayList<DiffChunk> chunks = new ArrayList<DiffChunk>();
@@ -96,19 +109,26 @@ private ArrayList<DiffChunk> toDiffChunks(ArrayList<Line> lines)
 
       int line = lines.get(0).getOldLine();
 
+      // The index of the earliest line that hasn't been put into a chunk yet
       int head = 0;
+
       for (int i = 1; i < lines.size(); i++)
       {
          if ((lines.get(i).getOldLine() - line) > 1)
          {
+            // There's a gap between this line and the previous line. Turn
+            // the previous contiguous run into a DiffChunk.
+
             List<Line> sublist = lines.subList(head, i);
             chunks.add(contiguousLinesToChunk(sublist));
+
+            // This line is now the start of a new contiguous run.
             head = i;
          }
          line = lines.get(i).getOldLine();
       }
 
-      // Add final chunk
+      // Add final contiguous run
       List<Line> sublist = lines.subList(head, lines.size());
       chunks.add(contiguousLinesToChunk(sublist));
 
@@ -127,27 +147,35 @@ private DiffChunk contiguousLinesToChunk(List<Line> sublist)
                            new ArrayList<Line>(sublist));
    }
 
+   /**
+    * Here is where the heavy lifting of merging is done
+    */
    private ArrayList<Line> generateOutputLines()
    {
+      // Clean up contextLines_ so it only contains lines that are part of
+      // the original document.
       for (int i = 0; i < contextLines_.size(); i++)
          if (contextLines_.get(i).getType() == Type.Insertion)
-         {
-            contextLines_.remove(i);
-            i--;
-         }
+            contextLines_.remove(i--);
+
+      // Clean up diffLines_ so it only contains lines that represent actual
+      // changes. If we don't do this then the merge logic gets very confusing!
       for (int i = 0; i < diffLines_.size(); i++)
          if (diffLines_.get(i).getType() == Type.Same)
-         {
-            diffLines_.remove(i);
-            i--;
-         }
+            diffLines_.remove(i--);
 
+      // Check to see if maybe there's nothing to do
       if (diffLines_.size() == 0)
          return new ArrayList<Line>();
 
+      // It's quite possible that the same DiffChunk was added multiple times.
+      // (Less likely--maybe impossible--is for overlapping DiffChunks to be
+      // added, but that would be dealt with by this as long as those DiffChunks
+      // contain consistent data.)
       Collections.sort(contextLines_);
       DuplicateHelper.dedupeSortedList(contextLines_);
 
+      // Clean up all the diff lines as well.
       Collections.sort(diffLines_);
       DuplicateHelper.dedupeSortedList(diffLines_);
 
@@ -158,46 +186,63 @@ private ArrayList<Line> generateOutputLines()
       Line ctx = ctxit.hasNext() ? ctxit.next() : null;
       Line dff = dffit.hasNext() ? dffit.next() : null;
 
+      /**
+       * Now we have two ordered iterators, one for the context (original
+       * document) and one for the diffs we want to apply to it. We want to
+       * merge them together into the output ArrayList in the proper order,
+       * being careful to throw out any context lines that are made obsolete
+       * by the diff lines.
+       */
+
+      // Tracks the amount that the "new" line numbers are offset from the "old"
+      // line numbers. new = old + skew
       int skew = 0;
-      int lastKnownOldLine = 0;
 
       // Do this while loop while both iterators still have elements
       while (ctx != null && dff != null)
       {
+         // Now we have a context line (ctx) and a diff line (dff) in hand.
+
          int cmp = ctx.getOldLine() - dff.getOldLine();
-         if (cmp < 0 || (cmp == 0 && !ctx.equals(dff)))
+         if (cmp == 0 && ctx.equals(dff))
          {
-            if (ctx.getOldLine() > lastKnownOldLine)
-            {
-               processContextLine(output, ctx, skew);
-               lastKnownOldLine = ctx.getOldLine();
-            }
+            /**
+             * ctx and dff are identical. And since we dropped Insertions from
+             * contextLines_ and Sames from diffLines_, we know they're
+             * Deletions. The dff takes precedence; we need to discard ctx so
+             * the line actually gets deleted.
+             */
+            ctx = ctxit.hasNext() ? ctxit.next() : null;
+            continue;
+         }
+
+         // In the case where cmp == 0, the oldLine properties were equal but
+         // the newLine properties were not. This means the diff is an
+         // insertion. We let the ctx line go first so the insertion happens
+         // in the right place.
+         if (cmp <= 0)
+         {
+            processContextLine(output, ctx, skew);
             ctx = ctxit.hasNext() ? ctxit.next() : null;
          }
          else
          {
             skew = processDiffLine(output, dff, skew);
-            lastKnownOldLine = dff.getOldLine();
             dff = dffit.hasNext() ? dffit.next() : null;
          }
       }
 
       // Finish off the context iterator if necessary
       while (ctx != null)
       {
-         if (ctx.getOldLine() > lastKnownOldLine)
-         {
-            processContextLine(output, ctx, skew);
-            lastKnownOldLine = ctx.getOldLine();
-         }
+         processContextLine(output, ctx, skew);
          ctx = ctxit.hasNext() ? ctxit.next() : null;
       }
 
       // Finish off the diff iterator if necessary
       while (dff != null)
       {
          skew = processDiffLine(output, dff, skew);
-         //lastKnownOldLine = dff.getOldLine(); // no longer necessary
          dff = dffit.hasNext() ? dffit.next() : null;
       }
 
@@ -221,23 +266,16 @@ private void processContextLine(ArrayList<Line> output, Line ctx, int skew)
                                 ctx.getOldLine() + skew,
                                 ctx.getText()));
             break;
-         case Insertion:
-            // This is a line that, in the original diff, was inserted into
-            // orig. But since we're processing it as context, we ignore the
-            // insertion, and let the line drop on the floor.
-            break;
+         default:
+            assert false : "Unexpected context line type";
+            throw new IllegalStateException();
       }
    }
 
    private int processDiffLine(ArrayList<Line> output, Line dff, int skew)
    {
       switch (dff.getType())
       {
-         case Same:
-            output.add(new Line(Type.Same, dff.getOldLine(),
-                                dff.getOldLine() + skew,
-                                dff.getText()));
-            break;
          case Deletion:
             output.add(new Line(Type.Deletion, dff.getOldLine(),
                                 dff.getOldLine() + skew,
@@ -249,6 +287,10 @@ private int processDiffLine(ArrayList<Line> output, Line dff, int skew)
                                 dff.getOldLine() + skew,
                                 dff.getText()));
             skew++;
+            break;
+         default:
+            assert false : "Unexpected diff line type";
+            throw new IllegalStateException();
       }
       return skew;
    }