diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def index a8056baeb0c1d..7f1723875f32a 100644 --- a/include/swift/AST/DiagnosticsParse.def +++ b/include/swift/AST/DiagnosticsParse.def @@ -94,6 +94,9 @@ ERROR(forbidden_extended_escaping_string,none, ERROR(regex_literal_parsing_error,none, "%0", (StringRef)) +ERROR(prefix_slash_not_allowed,none, + "prefix slash not allowed", ()) + //------------------------------------------------------------------------------ // MARK: Lexer diagnostics //------------------------------------------------------------------------------ diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h index ceda74e5a630d..5c695d920e33c 100644 --- a/include/swift/Parse/Lexer.h +++ b/include/swift/Parse/Lexer.h @@ -531,6 +531,9 @@ class Lexer { void operator=(const SILBodyRAII&) = delete; }; + /// Attempt to re-lex a regex literal with forward slashes `/.../`. + bool tryLexAsForwardSlashRegexLiteral(State S); + private: /// Nul character meaning kind. enum class NulCharacterKind { diff --git a/include/swift/Parse/Parser.h b/include/swift/Parse/Parser.h index 660cde62a0d9e..363a3fce5eaca 100644 --- a/include/swift/Parse/Parser.h +++ b/include/swift/Parse/Parser.h @@ -559,6 +559,11 @@ class Parser { return f(backtrackScope); } + /// Discard the current token. This will avoid interface hashing or updating + /// the previous loc. Only should be used if you've completely re-lexed + /// a different token at that position. + SourceLoc discardToken(); + /// Consume a token that we created on the fly to correct the original token /// stream from lexer. void consumeExtraToken(Token K); @@ -1723,6 +1728,7 @@ class Parser { ParserResult parseExprPoundCodeCompletion(Optional ParentKind); + UnresolvedDeclRefExpr *makeExprOperator(Token opToken); UnresolvedDeclRefExpr *parseExprOperator(); void validateCollectionElement(ParserResult element); diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 96a3ef892168f..b82397827e76e 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1997,6 +1997,62 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { return true; } +bool Lexer::tryLexAsForwardSlashRegexLiteral(State S) { + auto priorState = getStateForBeginningOfToken(NextToken, LeadingTrivia); + + // Re-lex from the given state. + restoreState(S); + + // While we restored state, that would have re-advanced the lexer. This is + // good in that it's filled in all the interesting properties of the token + // (trivia, is on new line, etc), but we need to rewind to re-lex the actual + // kind. + auto *TokStart = getBufferPtrForSourceLoc(NextToken.getLoc()); + assert(*TokStart == '/'); + CurPtr = TokStart + 1; + + auto bail = [&]() -> bool { + restoreState(priorState); + return false; + }; + + // We need to ban these characters at the start of a regex to avoid ambiguity + // with unapplied operator references, e.g 'foo(/, /)'. + switch (*CurPtr) { + case ' ': case '\t': case ',': case ')': + return bail(); + default: + break; + } + + while (true) { + uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd); + if (CharValue == ~0U) + return bail(); + + // Regex literals cannot span multiple lines. + if (CharValue == '\n' || CharValue == '\r') + return bail(); + + if (CharValue == '\\' && *CurPtr == '/') { + // Skip escaped delimiter and advance. + CurPtr++; + } else if (CharValue == '/') { + // End of literal, stop. + break; + } + } + // We've ended on the opening of a comment, bail. + // TODO: We could treat such cases as postfix operators on a regex literal, + // but it seems more likely the user has written a comment and is in the + // middle of editing the text before it. + if (*CurPtr == '*' || *CurPtr == '/') + return bail(); + + formToken(tok::regex_literal, TokStart); + return true; +} + /// lexEscapedIdentifier: /// identifier ::= '`' identifier '`' /// diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 14799a3e26414..cae6cf7564ea3 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -8524,6 +8524,11 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) { Tok.getRawText().front() == '!')) diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap); + if (Attributes.hasAttribute()) { + if (Tok.getText().contains("/")) + diagnose(Tok, diag::prefix_slash_not_allowed); + } + // A common error is to try to define an operator with something in the // unicode plane considered to be an operator, or to try to define an // operator like "not". Analyze and diagnose this specifically. diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index 07d98a96c8611..af94c2a1f5589 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -511,6 +511,31 @@ ParserResult Parser::parseExprSequenceElement(Diag<> message, ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { SyntaxParsingContext UnaryContext(SyntaxContext, SyntaxContextKind::Expr); UnresolvedDeclRefExpr *Operator; + + // First check to see if we have the start of a regex literal /.../. + switch (Tok.getKind()) { + case tok::oper_prefix: + case tok::oper_binary_spaced: + case tok::oper_binary_unspaced: { + if (!Tok.getText().startswith("/")) + break; + + // Try re-lex as a /.../ regex literal. + if (!L->tryLexAsForwardSlashRegexLiteral(getParserPosition().LS)) + break; + + // Discard the operator token, which will be replaced by the regex literal + // token. + discardToken(); + + assert(Tok.getText().startswith("/")); + assert(Tok.is(tok::regex_literal)); + break; + } + default: + break; + } + switch (Tok.getKind()) { default: // If the next token is not an operator, just parse this as expr-postfix. @@ -532,16 +557,31 @@ ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { case tok::backslash: return parseExprKeyPath(); - case tok::oper_postfix: + case tok::oper_postfix: { // Postfix operators cannot start a subexpression, but can happen // syntactically because the operator may just follow whatever precedes this // expression (and that may not always be an expression). diagnose(Tok, diag::invalid_postfix_operator); Tok.setKind(tok::oper_prefix); - LLVM_FALLTHROUGH; - case tok::oper_prefix: Operator = parseExprOperator(); break; + } + case tok::oper_prefix: { + // Check to see if we can split a prefix operator containing '/', e.g '!/', + // which might be a prefix operator on a regex literal. + auto slashIdx = Tok.getText().find("/"); + if (slashIdx != StringRef::npos) { + auto prefix = Tok.getText().take_front(slashIdx); + if (!prefix.empty()) { + Operator = makeExprOperator({Tok.getKind(), prefix}); + consumeStartingCharacterOfCurrentToken(Tok.getKind(), prefix.size()); + break; + } + diagnose(Tok, diag::prefix_slash_not_allowed); + } + Operator = parseExprOperator(); + break; + } case tok::oper_binary_spaced: case tok::oper_binary_unspaced: { // For recovery purposes, accept an oper_binary here. @@ -860,19 +900,24 @@ static DeclRefKind getDeclRefKindForOperator(tok kind) { } } -/// parseExprOperator - Parse an operator reference expression. These -/// are not "proper" expressions; they can only appear in binary/unary -/// operators. -UnresolvedDeclRefExpr *Parser::parseExprOperator() { +UnresolvedDeclRefExpr *Parser::makeExprOperator(Token Tok) { assert(Tok.isAnyOperator()); DeclRefKind refKind = getDeclRefKindForOperator(Tok.getKind()); SourceLoc loc = Tok.getLoc(); DeclNameRef name(Context.getIdentifier(Tok.getText())); - consumeToken(); // Bypass local lookup. return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc)); } +/// parseExprOperator - Parse an operator reference expression. These +/// are not "proper" expressions; they can only appear in binary/unary +/// operators. +UnresolvedDeclRefExpr *Parser::parseExprOperator() { + auto *op = makeExprOperator(Tok); + consumeToken(); + return op; +} + /// parseExprSuper /// /// expr-super: diff --git a/lib/Parse/ParseRegex.cpp b/lib/Parse/ParseRegex.cpp index 802d0c25c70eb..2e22ded577fbf 100644 --- a/lib/Parse/ParseRegex.cpp +++ b/lib/Parse/ParseRegex.cpp @@ -32,13 +32,31 @@ using namespace swift::syntax; ParserResult Parser::parseExprRegexLiteral() { assert(Tok.is(tok::regex_literal)); - assert(regexLiteralParsingFn); + + // Bail if '-enable-experimental-string-processing' is not enabled. + if (!Context.LangOpts.EnableExperimentalStringProcessing || + !regexLiteralParsingFn) { + diagnose(Tok, diag::regex_literal_parsing_error, + "regex literal requires '-enable-experimental-string-processing'"); + auto loc = consumeToken(); + return makeParserResult(new (Context) ErrorExpr(loc)); + } SyntaxParsingContext LocalContext(SyntaxContext, SyntaxKind::RegexLiteralExpr); auto regexText = Tok.getText(); + // The Swift library doesn't know about `/.../` regexes, let's pretend it's + // `#/.../#` instead. + if (regexText[0] == '/') { + SmallString<32> scratch; + scratch.append("#"); + scratch.append(regexText); + scratch.append("#"); + regexText = Context.AllocateCopy(StringRef(scratch)); + } + // Let the Swift library parse the contents, returning an error, or null if // successful. // TODO: We need to be able to pass back a source location to emit the error diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index 708a3858e8d40..077bb332c9aff 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -579,13 +579,16 @@ const Token &Parser::peekToken() { return L->peekNextToken(); } -SourceLoc Parser::consumeTokenWithoutFeedingReceiver() { - SourceLoc Loc = Tok.getLoc(); +SourceLoc Parser::discardToken() { assert(Tok.isNot(tok::eof) && "Lexing past eof!"); + SourceLoc Loc = Tok.getLoc(); + L->lex(Tok, LeadingTrivia, TrailingTrivia); + return Loc; +} +SourceLoc Parser::consumeTokenWithoutFeedingReceiver() { recordTokenHash(Tok); - - L->lex(Tok, LeadingTrivia, TrailingTrivia); + auto Loc = discardToken(); PreviousLoc = Loc; return Loc; } diff --git a/test/StringProcessing/Parse/forward-slash-regex-default.swift b/test/StringProcessing/Parse/forward-slash-regex-default.swift new file mode 100644 index 0000000000000..3fccb6802cdde --- /dev/null +++ b/test/StringProcessing/Parse/forward-slash-regex-default.swift @@ -0,0 +1,13 @@ +// RUN: %target-typecheck-verify-swift + +let _ = /x/ // expected-error {{regex literal requires '-enable-experimental-string-processing'}} + +prefix operator / // expected-error {{prefix slash not allowed}} +prefix operator ^/ // expected-error {{prefix slash not allowed}} + +_ = /x +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} +// expected-error@-3 {{cannot find 'x' in scope}} + +_ = !/x/ // expected-error {{regex literal requires '-enable-experimental-string-processing'}} diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift new file mode 100644 index 0000000000000..f9749de5ba601 --- /dev/null +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -0,0 +1,283 @@ +// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing +// REQUIRES: swift_in_compiler +// REQUIRES: concurrency + +precedencegroup P { + associativity: left +} + +// Fine. +infix operator /^/ : P +func /^/ (lhs: Int, rhs: Int) -> Int { 0 } + +let i = 0 /^/ 1/^/3 + +prefix operator / // expected-error {{prefix slash not allowed}} +prefix operator ^/ // expected-error {{prefix slash not allowed}} + +let x = /abc/ +_ = /abc/ +_ = /x/.self +_ = /\// + +// These unfortunately become infix `=/`. We could likely improve the diagnostic +// though. +let z=/0/ +// expected-error@-1 {{type annotation missing in pattern}} +// expected-error@-2 {{consecutive statements on a line must be separated by ';'}} +// expected-error@-3 {{expected expression after unary operator}} +// expected-error@-4 {{cannot find operator '=/' in scope}} +// expected-error@-5 {{'/' is not a postfix unary operator}} +_=/0/ +// expected-error@-1 {{'_' can only appear in a pattern or on the left side of an assignment}} +// expected-error@-2 {{cannot find operator '=/' in scope}} +// expected-error@-3 {{'/' is not a postfix unary operator}} + +_ = /x +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +_ = !/x/ +// expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type 'Bool'}} + +_ = /x/! // expected-error {{cannot force unwrap value of non-optional type 'Regex'}} +_ = /x/ + /y/ // expected-error {{binary operator '+' cannot be applied to two 'Regex' operands}} + +_ = /x/+/y/ +// expected-error@-1 {{cannot find operator '+/' in scope}} +// expected-error@-2 {{'/' is not a postfix unary operator}} +// expected-error@-3 {{cannot find 'y' in scope}} + +_ = /x/?.blah +// expected-error@-1 {{cannot use optional chaining on non-optional value of type 'Regex'}} +// expected-error@-2 {{value of type 'Regex' has no member 'blah'}} +_ = /x/!.blah +// expected-error@-1 {{cannot force unwrap value of non-optional type 'Regex'}} +// expected-error@-2 {{value of type 'Regex' has no member 'blah'}} + +_ = /x /? // expected-error {{cannot use optional chaining on non-optional value of type 'Regex'}} + .blah // expected-error {{value of type 'Regex' has no member 'blah'}} + +_ = 0; /x / // expected-warning {{regular expression literal is unused}} + +_ = /x / ? 0 : 1 // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +_ = .random() ? /x / : .blah // expected-error {{type 'Regex' has no member 'blah'}} + +_ = /x/ ?? /x/ // expected-warning {{left side of nil coalescing operator '??' has non-optional type 'Regex', so the right side is never used}} +_ = /x / ?? /x / // expected-warning {{left side of nil coalescing operator '??' has non-optional type 'Regex', so the right side is never used}} + +_ = /x/??/x/ // expected-error {{'/' is not a postfix unary operator}} + +_ = /x/ ... /y/ // expected-error {{referencing operator function '...' on 'Comparable' requires that 'Regex' conform to 'Comparable'}} + +_ = /x/.../y/ +// expected-error@-1 {{missing whitespace between '...' and '/' operators}} +// expected-error@-2 {{'/' is not a postfix unary operator}} +// expected-error@-3 {{cannot find 'y' in scope}} + +_ = /x /... +// expected-error@-1 {{unary operator '...' cannot be applied to an operand of type 'Regex'}} +// expected-note@-2 {{overloads for '...' exist with these partially matching parameter lists}} + +do { + _ = true / false /; // expected-error {{expected expression after operator}} +} + +_ = "\(/x/)" + +func defaulted(x: Regex = /x/) {} + +func foo(_ x: T, y: T) {} +foo(/abc/, y: /abc /) + +func bar(_ x: inout T) {} + +// TODO: We split this into a prefix '&', but inout is handled specially when +// parsing an argument list. This shouldn't matter anyway, but we should at +// least have a custom diagnostic. +bar(&/x/) +// expected-error@-1 {{'&' is not a prefix unary operator}} + +struct S { + subscript(x: Regex) -> Void { () } +} + +func testSubscript(_ x: S) { + x[/x/] + x[/x /] +} + +func testReturn() -> Regex { + if .random() { + return /x/ + } + return /x / +} + +func testThrow() throws { + throw /x / // expected-error {{thrown expression type 'Regex' does not conform to 'Error'}} +} + +_ = [/abc/, /abc /] +_ = [/abc/:/abc/] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc/ : /abc/] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc /:/abc /] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc /: /abc /] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = (/abc/, /abc /) +_ = ((/abc /)) + +_ = { /abc/ } +_ = { + /abc/ +} + +let _: () -> Int = { + 0 + / 1 / + 2 +} + +_ = { + 0 // expected-warning {{integer literal is unused}} + /1 / // expected-warning {{regular expression literal is unused}} + 2 // expected-warning {{integer literal is unused}} +} + +// Operator chain, as a regex literal may not start with space. +_ = 2 +/ 1 / .bitWidth + +_ = 2 +/1/ .bitWidth // expected-error {{value of type 'Regex' has no member 'bitWidth'}} + +_ = 2 +/ 1 / + .bitWidth + +_ = 2 +/1 / + .bitWidth // expected-error {{value of type 'Regex' has no member 'bitWidth'}} + +let z = +/y/ + +// While '.' is technically an operator character, it seems more likely that +// the user hasn't written the member name yet. +_ = 0. / 1 / 2 // expected-error {{expected member name following '.'}} +_ = 0 . / 1 / 2 // expected-error {{expected member name following '.'}} + +switch "" { +case /x/: + // expected-error@-1 {{expression pattern of type 'Regex' cannot match values of type 'String'}} + // expected-note@-2 {{overloads for '~=' exist with these partially matching parameter lists: (Substring, String)}} + break +case _ where /x /: + // expected-error@-1 {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} + break +default: + break +} + +do {} catch /x / {} +// expected-error@-1 {{expression pattern of type 'Regex' cannot match values of type 'any Error'}} +// expected-error@-2 {{binary operator '~=' cannot be applied to two 'any Error' operands}} +// expected-warning@-3 {{'catch' block is unreachable because no errors are thrown in 'do' block}} + +switch /x / { +default: + break +} + +if /x / {} // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +if /x /.smth {} // expected-error {{value of type 'Regex' has no member 'smth'}} + +func testGuard() { + guard /x/ else { return } // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +} + +for x in [0] where /x/ {} // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} + +typealias Magic = T +_ = /x/ as Magic +_ = /x/ as! String // expected-warning {{cast from 'Regex' to unrelated type 'String' always fails}} + +_ = type(of: /x /) + +do { + let /x / // expected-error {{expected pattern}} +} + +_ = try /x/; _ = try /x / +// expected-warning@-1 2{{no calls to throwing functions occur within 'try' expression}} + +// TODO: `try?` and `try!` are currently broken. +// _ = try? /x/; _ = try? / x / +// _ = try! /x/; _ = try! / x / + +_ = await /x / // expected-warning {{no 'async' operations occur within 'await' expression}} + +/x/ = 0 // expected-error {{cannot assign to value: literals are not mutable}} +/x/() // expected-error {{cannot call value of non-function type 'Regex'}} + +// TODO: We could allow this (and treat the last '/' as postfix), though it +// seems more likely the user has written a comment and is still in the middle +// of writing the characters before it. +/x// +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +/x // +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +/x/**/ +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +// Make sure we continue parsing these as operators, as they are not right +// bound. +func foo(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} +foo(/, /) +foo(/,/) +foo((/), (/)) + +func bar(_ x: (Int, Int) -> Int, _ y: T) -> Int { 0 } +_ = bar(/, 1) / 2 +_ = bar(/, "(") / 2 +_ = bar(/, 1) // comment + +let arr: [Double] = [2, 3, 4] +_ = arr.reduce(1, /) / 3 +_ = arr.reduce(1, /) + arr.reduce(1, /) + +// Fine. +_ = /./ + +// You need to escape if you want a regex literal to start with these characters. +// TODO: Better recovery +_ = /\ / +do { _ = / / } +// expected-error@-1 2{{unary operator cannot be separated from its operand}} +// expected-error@-2 {{expected expression in assignment}} + +_ = /\)/ +do { _ = /)/ } +// expected-error@-1 {{expected expression after unary operator}} +// expected-error@-2 {{expected expression in assignment}} + +_ = /\,/ +do { _ = /,/ } +// expected-error@-1 {{expected expression after unary operator}} +// expected-error@-2 {{expected expression in assignment}} + +_ = /}/ +_ = /]/ +_ = /:/ +_ = /;/ + +// TODO: Need to delay diagnostics for these. +_ = /0xG/ // expected-error {{'G' is not a valid hexadecimal digit (0-9, A-F) in integer literal}} +_ = /0oG/ // expected-error {{'G' is not a valid octal digit (0-7) in integer literal}} +_ = /"/ // expected-error {{unterminated string literal}} +_ = /'/ // expected-error {{unterminated string literal}} +_ = /<#placeholder#>/ // expected-error {{editor placeholder in source file}}