From 98a5bb2503cbafb2431ba46e9ef6b7d4bc7adfae Mon Sep 17 00:00:00 2001 From: kpdecker Date: Wed, 6 May 2015 09:20:53 -0500 Subject: [PATCH] Only use ASCII new lines for patch tokenization Fixes #57 --- diff.js | 26 ++++++++++++++++++++++++-- test/applyPatch.js | 8 ++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/diff.js b/diff.js index 482a0a71..421854a1 100644 --- a/diff.js +++ b/diff.js @@ -322,7 +322,7 @@ for (var i = 0; i < lines.length; i++) { var line = lines[i], lastLine = lines[i - 1], - lastLineLastChar = lastLine ? lastLine[lastLine.length - 1] : ''; + lastLineLastChar = lastLine && lastLine[lastLine.length - 1]; // Merge lines that may contain windows new lines if (line === '\n' && lastLineLastChar === '\r') { @@ -342,6 +342,28 @@ return retLines; }; + var PatchDiff = new Diff(); + PatchDiff.tokenize = function(value) { + var ret = [], + linesAndNewlines = value.split(/(\n|\r\n)/); + + // Ignore the final empty token that occurs if the string ends with a new line + if (!linesAndNewlines[linesAndNewlines.length - 1]) { + linesAndNewlines.pop(); + } + + // Merge the content and line separators into single tokens + for (var i = 0; i < linesAndNewlines.length; i++) { + var line = linesAndNewlines[i]; + + if (i % 2) { + ret[ret.length - 1] += line; + } else { + ret.push(line); + } + } + return ret; + }; var SentenceDiff = new Diff(); SentenceDiff.tokenize = function(value) { @@ -387,7 +409,7 @@ ret.push('--- ' + oldFileName + (typeof oldHeader === 'undefined' ? '' : '\t' + oldHeader)); ret.push('+++ ' + newFileName + (typeof newHeader === 'undefined' ? '' : '\t' + newHeader)); - var diff = LineDiff.diff(oldStr, newStr); + var diff = PatchDiff.diff(oldStr, newStr); diff.push({value: '', lines: []}); // Append an empty value to make cleanup easier // Formats a given set of lines for printing as context lines in a patch diff --git a/test/applyPatch.js b/test/applyPatch.js index 304ded65..0ca02601 100644 --- a/test/applyPatch.js +++ b/test/applyPatch.js @@ -305,4 +305,12 @@ describe('#applyPatch', function() { + ' line5\n') .should.equal(false); }); + + it('should work with unicode newline characters', function() { + var oldtext = 'AAAAAAAAAAAAAAAA\n\n'; + var newtext = 'AAAAAAAAAAAAAAAA\nBBBBBB' + String.fromCharCode(0x2028) + '\nCCCCCCCCCCCCCCCCCC\n\n'; + + var diffed = diff.createPatch('test', oldtext, newtext); + diff.applyPatch(oldtext, diffed).should.equal(newtext); + }); });