#88: Fixed regression in handling of CR/LF combinations within string…

…s. Added test case to prevent such a regression in the future.
microsoft · Apr 25, 2019 · 9d2734b · 9d2734b
1 parent 4df9dec
commit 9d2734b
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 1 deletion.
diff --git a/server/src/parser/tokenizer.ts b/server/src/parser/tokenizer.ts
@@ -971,7 +971,7 @@ export class Tokenizer {
                 }
 
                 // Skip over the escaped new line (either one or two characters).
-                if (this._cs.currentChar === Char.LineFeed && this._cs.nextChar === Char.CarriageReturn) {
+                if (this._cs.currentChar === Char.CarriageReturn && this._cs.nextChar === Char.LineFeed) {
                     unescapedValue += String.fromCharCode(this._cs.currentChar);
                     this._cs.moveNext();
                 }

diff --git a/server/src/tests/samples/lines1.py b/server/src/tests/samples/lines1.py
@@ -0,0 +1,14 @@
+# This sample tests that the tokenizer properly handles
+# line feeds.
+
+"""
+This is a multi-line comment \
+with escape characters.
+"""
+
+# This is a raw string with an escaped EOL.
+foo = r"\
+"
+
+# The final token should be on line 14
+bar = foo
diff --git a/server/src/tests/tokenizer.test.ts b/server/src/tests/tokenizer.test.ts
@@ -12,6 +12,8 @@
 
 import * as assert from 'assert';
 
+import { TestUtils } from './testUtils';
+
 import { Tokenizer } from '../parser/tokenizer';
 import { DedentToken, IdentifierToken, IndentToken, NewLineToken, NewLineType,
     NumberToken, OperatorToken, OperatorType, StringToken,
@@ -998,3 +1000,20 @@ test('Identifiers', () => {
     assert.equal(results.tokens.getItemAt(3).type, TokenType.Identifier);
     assert.equal(results.tokens.getItemAt(3).length, 5);
 });
+
+test('Lines1', () => {
+    const sampleText = TestUtils.readSampleFile('lines1.py');
+    const t = new Tokenizer();
+
+    // Start with the line feed only. We don't know whether the
+    // sample file was stored with CR/LF or just LF, so do
+    // the replacement here.
+    const sampleTextLfOnly = sampleText.replace(/\r\n/g, '\n');
+    const resultsLf = t.tokenize(sampleTextLfOnly);
+    assert.equal(resultsLf.lines.count, 14);
+
+    // Now replace the LF with CR/LF sequences.
+    const sampleTextCrLf = sampleTextLfOnly.replace(/\n/g, '\r\n');
+    const resultsCrLf = t.tokenize(sampleTextCrLf);
+    assert.equal(resultsCrLf.lines.count, 14);
+});