Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Parse] An implementation for SE-0182 #11080

Merged
merged 1 commit into from
Jul 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,27 @@ unsigned Lexer::lexUnicodeEscape(const char *&CurPtr, Lexer *Diags) {
return CharValue;
}

/// maybeConsumeNewlineEscape - Check for valid elided newline escape and
/// move pointer passed in to the character after the end of the line.
static bool maybeConsumeNewlineEscape(const char *&CurPtr, ssize_t Offset) {
const char *TmpPtr = CurPtr + Offset;
while (true) {
switch (*TmpPtr++) {
case ' ': case '\t':
continue;
case '\r':
if (*TmpPtr == '\n')
TmpPtr++;
LLVM_FALLTHROUGH;
case '\n':
CurPtr = TmpPtr;
return true;
case 0:
default:
return false;
}
}
}

/// lexCharacter - Read a character and return its UTF32 code. If this is the
/// end of enclosing string/character sequence (i.e. the character is equal to
Expand Down Expand Up @@ -1254,6 +1275,10 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote,
unsigned CharValue = 0;
// Escape processing. We already ate the "\".
switch (*CurPtr) {
case ' ': case '\t': case '\n': case '\r':
if (MultilineString && maybeConsumeNewlineEscape(CurPtr, 0))
return '\n';
LLVM_FALLTHROUGH;
default: // Invalid escape.
if (EmitDiagnostics)
diagnose(CurPtr, diag::lex_invalid_escape);
Expand Down Expand Up @@ -1380,7 +1405,11 @@ static const char *skipToEndOfInterpolatedExpression(const char *CurPtr,
// Entering a recursive interpolated expression
OpenDelimiters.push_back('(');
continue;
case '\n': case '\r': case 0:
case '\n': case '\r':
if (AllowNewline.back())
continue;
LLVM_FALLTHROUGH;
case 0:
// Don't jump over newline/EOF due to preceding backslash!
return CurPtr-1;
default:
Expand Down Expand Up @@ -1883,12 +1912,14 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
// we know that there is a terminating " character. Use BytesPtr to avoid a
// range check subscripting on the StringRef.
const char *BytesPtr = Bytes.begin();
bool IsEscapedNewline = false;
while (BytesPtr < Bytes.end()) {
char CurChar = *BytesPtr++;

// Multiline string line ending normalization and indent stripping.
if (CurChar == '\r' || CurChar == '\n') {
bool stripNewline = IsFirstSegment && BytesPtr - 1 == Bytes.begin();
bool stripNewline = IsEscapedNewline ||
(IsFirstSegment && BytesPtr - 1 == Bytes.begin());
if (CurChar == '\r' && *BytesPtr == '\n')
BytesPtr++;
if (*BytesPtr != '\r' && *BytesPtr != '\n')
Expand All @@ -1897,6 +1928,7 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
stripNewline = true;
if (!stripNewline)
TempString.push_back('\n');
IsEscapedNewline = false;
continue;
}

Expand All @@ -1921,6 +1953,12 @@ StringRef Lexer::getEncodedStringSegment(StringRef Bytes,
case '\'': TempString.push_back('\''); continue;
case '\\': TempString.push_back('\\'); continue;

case ' ': case '\t': case '\n': case '\r':
if (maybeConsumeNewlineEscape(BytesPtr, -1)) {
IsEscapedNewline = true;
BytesPtr--;
}
continue;

// String interpolation.
case '(':
Expand Down
6 changes: 6 additions & 0 deletions test/Parse/multiline_errors.swift
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,9 @@ _ = "hello\(
""")!"
// expected-error@-4 {{unterminated string literal}}
// expected-error@-2 {{unterminated string literal}}

_ = """
line one \ non-whitepace
line two
"""
// expected-error@-3 {{invalid escape sequence in literal}}
45 changes: 45 additions & 0 deletions test/Parse/multiline_string.swift
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,51 @@ _ = """
"""
// CHECK: "Twelve\nNu"

_ = """
newline \
elided
"""
// CHECK: "newline elided"

// contains trailing whitepsace
_ = """
trailing \
\("""
substring1 \
\("""
substring2 \
substring3
""")\
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rationale for SE-182 states:

It also discussed whether \ should be allowed on the line immediately following the close """ and agreed that it was best to not allow it in this go-around.

This \ should be rejected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a mistake in the updated proposal. It should read “immediately preceding”.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not implemented as an error.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry. I didn't read the PR comment.

""") \
whitepsace
"""
// CHECK: "trailing "
// CHECK: "substring1 "
// CHECK: "substring2 substring3"
// CHECK: " whitepsace"

// contains trailing whitepsace
_ = """
foo\

bar
"""
// CHECK: "foo\nbar"

// contains trailing whitepsace
_ = """
foo\

bar
"""
// CHECK: "foo\nbar"

_ = """
foo \
bar
"""
// CHECK: "foo bar"

_ = """

ABC
Expand Down