From 6a8da3c609408a18dd1d1e49355ba564b50ec92c Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Thu, 12 May 2022 19:42:36 +0100 Subject: [PATCH 1/3] [Parse] Re-allow prefix operators containing `/` --- include/swift/AST/DiagnosticsParse.def | 3 --- lib/Parse/ParseDecl.cpp | 7 ------- test/StringProcessing/Frontend/enable-flag.swift | 2 +- test/StringProcessing/Parse/forward-slash-regex.swift | 6 +++--- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def index b123a92839780..949ddc2c8d54d 100644 --- a/include/swift/AST/DiagnosticsParse.def +++ b/include/swift/AST/DiagnosticsParse.def @@ -94,9 +94,6 @@ ERROR(forbidden_extended_escaping_string,none, ERROR(regex_literal_parsing_error,none, "%0", (StringRef)) -ERROR(prefix_slash_not_allowed,none, - "prefix operator may not contain '/'", ()) - //------------------------------------------------------------------------------ // MARK: Lexer diagnostics //------------------------------------------------------------------------------ diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index e8c43af987100..4a1a41a05100f 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -8528,13 +8528,6 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) { Tok.getRawText().front() == '!')) diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap); - // Prefix operators may not contain the `/` character when `/.../` regex - // literals are enabled. - if (Context.LangOpts.EnableBareSlashRegexLiterals) { - if (Attributes.hasAttribute() && Tok.getText().contains("/")) - diagnose(Tok, diag::prefix_slash_not_allowed); - } - // A common error is to try to define an operator with something in the // unicode plane considered to be an operator, or to try to define an // operator like "not". Analyze and diagnose this specifically. diff --git a/test/StringProcessing/Frontend/enable-flag.swift b/test/StringProcessing/Frontend/enable-flag.swift index bd43ba49e235f..601cdb71e1a59 100644 --- a/test/StringProcessing/Frontend/enable-flag.swift +++ b/test/StringProcessing/Frontend/enable-flag.swift @@ -4,7 +4,7 @@ // REQUIRES: swift_in_compiler -prefix operator / // expected-error {{prefix operator may not contain '/'}} +prefix operator / _ = /x/ _ = #/x/# diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift index 7b6d5ceb9a6a2..155496f8d4299 100644 --- a/test/StringProcessing/Parse/forward-slash-regex.swift +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -2,9 +2,9 @@ // REQUIRES: swift_in_compiler // REQUIRES: concurrency -prefix operator / // expected-error {{prefix operator may not contain '/'}} -prefix operator ^/ // expected-error {{prefix operator may not contain '/'}} -prefix operator /^/ // expected-error {{prefix operator may not contain '/'}} +prefix operator / +prefix operator ^/ +prefix operator /^/ prefix operator !! prefix func !! (_ x: T) -> T { x } From 80bb83a4ab65a47eac46168b38f607bcf727c48c Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Thu, 12 May 2022 19:42:38 +0100 Subject: [PATCH 2/3] [Parse] Lenient prefix `/` parsing Treat a prefix operator containing `/` the same as the unapplied infix operator case, where we tentatively lex. This means that we bail if there is no closing `/` or the starting character is invalid. This leaves binary operator containing `/` in expression position as the last place where we know that we definitely have a regex literal. --- include/swift/Parse/Parser.h | 5 +- lib/Parse/ParseExpr.cpp | 106 ++++++++++-------- .../Parse/forward-slash-regex.swift | 9 +- .../Parse/regex_parse_error.swift | 12 +- 4 files changed, 71 insertions(+), 61 deletions(-) diff --git a/include/swift/Parse/Parser.h b/include/swift/Parse/Parser.h index 37ff7c2248425..36346155a67e5 100644 --- a/include/swift/Parse/Parser.h +++ b/include/swift/Parse/Parser.h @@ -1762,10 +1762,7 @@ class Parser { /// Try re-lex a '/' operator character as a regex literal. This should be /// called when parsing in an expression position to ensure a regex literal is /// correctly parsed. - /// - /// If \p mustBeRegex is set to true, a regex literal will always be lexed if - /// enabled. Otherwise, it will not be lexed if it may be ambiguous. - void tryLexRegexLiteral(bool mustBeRegex); + void tryLexRegexLiteral(bool forUnappliedOperator); void validateCollectionElement(ParserResult element); diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index d6bf3eb86201f..5ceed611d719b 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -513,7 +513,7 @@ ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { UnresolvedDeclRefExpr *Operator; // First check to see if we have the start of a regex literal `/.../`. - tryLexRegexLiteral(/*mustBeRegex*/ true); + tryLexRegexLiteral(/*forUnappliedOperator*/ false); switch (Tok.getKind()) { default: @@ -880,56 +880,64 @@ UnresolvedDeclRefExpr *Parser::parseExprOperator() { return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc)); } -void Parser::tryLexRegexLiteral(bool mustBeRegex) { +void Parser::tryLexRegexLiteral(bool forUnappliedOperator) { if (!Context.LangOpts.EnableBareSlashRegexLiterals) return; // Check to see if we have a regex literal `/.../`, optionally with a prefix // operator e.g `!/.../`. + bool mustBeRegex = false; switch (Tok.getKind()) { case tok::oper_prefix: + // Prefix operators may contain `/` characters, so this may not be a regex, + // and as such need to make sure we have a closing `/`. + break; case tok::oper_binary_spaced: - case tok::oper_binary_unspaced: { - // Check to see if we have an operator containing '/'. - auto slashIdx = Tok.getText().find("/"); - if (slashIdx == StringRef::npos) - break; + case tok::oper_binary_unspaced: + // When re-lexing for a unary expression, binary operators are always + // invalid, so we can be confident in always lexing a regex literal. + mustBeRegex = !forUnappliedOperator; + break; + default: + // We only re-lex regex literals for operator tokens. + return; + } - CancellableBacktrackingScope backtrack(*this); - { - Optional regexScope; - regexScope.emplace(*L, mustBeRegex); - - // Try re-lex as a `/.../` regex literal, this will split an operator if - // necessary. - L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true); - - // If we didn't split a prefix operator, reset the regex lexing scope. - // Otherwise, we want to keep it in place for the next token. - auto didSplit = L->peekNextToken().getLength() == slashIdx; - if (!didSplit) - regexScope.reset(); - - // Discard the current token, which will be replaced by the re-lexed - // token, which will either be a regex literal token, a prefix operator, - // or the original unchanged token. - discardToken(); - - // If we split a prefix operator from the regex literal, and are not sure - // whether this should be a regex, backtrack if we didn't end up lexing a - // regex literal. - if (didSplit && !mustBeRegex && - !L->peekNextToken().is(tok::regex_literal)) { - return; - } + // Check to see if we have an operator containing '/'. + auto slashIdx = Tok.getText().find("/"); + if (slashIdx == StringRef::npos) + return; + + CancellableBacktrackingScope backtrack(*this); + { + Optional regexScope; + regexScope.emplace(*L, mustBeRegex); - // Otherwise, accept the result. - backtrack.cancelBacktrack(); + // Try re-lex as a `/.../` regex literal, this will split an operator if + // necessary. + L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true); + + // If we didn't split a prefix operator, reset the regex lexing scope. + // Otherwise, we want to keep it in place for the next token. + auto didSplit = L->peekNextToken().getLength() == slashIdx; + if (!didSplit) + regexScope.reset(); + + // Discard the current token, which will be replaced by the re-lexed + // token, which will either be a regex literal token, a prefix operator, + // or the original unchanged token. + discardToken(); + + // If we split a prefix operator from the regex literal, and are not sure + // whether this should be a regex, backtrack if we didn't end up lexing a + // regex literal. + if (didSplit && !mustBeRegex && + !L->peekNextToken().is(tok::regex_literal)) { + return; } - break; - } - default: - break; + + // Otherwise, accept the result. + backtrack.cancelBacktrack(); } } @@ -3220,17 +3228,23 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok, SourceLoc FieldNameLoc; parseOptionalArgumentLabel(FieldName, FieldNameLoc); - // First check to see if we have the start of a regex literal `/.../`. We - // need to do this before handling unapplied operator references, as e.g - // `(/, /)` might be a regex literal. - tryLexRegexLiteral(/*mustBeRegex*/ false); - // See if we have an operator decl ref '()'. The operator token in // this case lexes as a binary operator because it neither leads nor // follows a proper subexpression. + auto isUnappliedOperator = [&]() { + return Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma); + }; + + if (isUnappliedOperator()) { + // Check to see if we have the start of a regex literal `/.../`. We need + // to do this for an unapplied operator reference, as e.g `(/, /)` might + // be a regex literal. + tryLexRegexLiteral(/*forUnappliedOperator*/ true); + } + ParserStatus Status; Expr *SubExpr = nullptr; - if (Tok.isBinaryOperator() && peekToken().isAny(rightTok, tok::comma)) { + if (isUnappliedOperator()) { SyntaxParsingContext operatorContext(SyntaxContext, SyntaxKind::IdentifierExpr); DeclNameLoc Loc; diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift index 155496f8d4299..49fcf64c4ac1f 100644 --- a/test/StringProcessing/Parse/forward-slash-regex.swift +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -53,8 +53,9 @@ do { // expected-error@-3 {{'/' is not a postfix unary operator}} } +// No closing '/' so a prefix operator. _ = /x -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} _ = !/x/ // expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type 'Bool'}} @@ -252,13 +253,13 @@ _ = await /x / // expected-warning {{no 'async' operations occur within 'await' // written a comment and is still in the middle of writing the characters before // it. _ = /x// comment -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} _ = /x // comment -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} _ = /x/*comment*/ -// expected-error@-1 {{unterminated regex literal}} +// expected-error@-1 {{'/' is not a prefix unary operator}} // These become regex literals, unless surrounded in parens. func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 4{{'baz' declared here}} diff --git a/test/StringProcessing/Parse/regex_parse_error.swift b/test/StringProcessing/Parse/regex_parse_error.swift index b53500d8aed52..4a96cab3e01e7 100644 --- a/test/StringProcessing/Parse/regex_parse_error.swift +++ b/test/StringProcessing/Parse/regex_parse_error.swift @@ -30,17 +30,15 @@ _ = #/\(?'abc/# do { _ = /\ / - // expected-error@-2 {{unterminated regex literal}} - // expected-error@-3 {{expected escape sequence}} -} // expected-error {{expected expression after operator}} + // expected-error@-1:3 {{expected expression path in Swift key path}} +} do { _ = #/\ /# - // expected-error@-2 {{unterminated regex literal}} - // expected-error@-3 {{expected escape sequence}} - // expected-error@-3 {{unterminated regex literal}} - // expected-warning@-4 {{regular expression literal is unused}} + // expected-error@-2:7 {{unterminated regex literal}} + // expected-error@-3:7 {{expected escape sequence}} + // expected-error@-3:4 {{expected expression}} } func foo(_ x: T, _ y: T) {} From 342e0658fc11f2de0b3dc2ff5af7d4cab110b85d Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Thu, 12 May 2022 19:42:38 +0100 Subject: [PATCH 3/3] [Parse] Expand unbalanced `)` regex literal heuristic Previously we would only check for a starting character of `)` when performing a tentative lex of a regex literal. Expand this to cover the entire range of the regex literal, ensuring to take escapes and custom character classes into account. --- lib/Parse/Lexer.cpp | 56 +++++++--- .../Parse/forward-slash-regex.swift | 103 +++++++++++++++--- .../StringProcessing/Parse/prefix-slash.swift | 23 ++++ 3 files changed, 152 insertions(+), 30 deletions(-) create mode 100644 test/StringProcessing/Parse/prefix-slash.swift diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index da4ce6a302b36..8b8a92e39dc9a 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -2000,23 +2000,11 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { // 2 // } // - // This takes advantage of the consistent operator spacing rule. We also - // need to ban ')' to avoid ambiguity with unapplied operator references e.g - // `reduce(1, /)`. This would be invalid regex syntax anyways. Note this - // doesn't totally save us from e.g `foo(/, 0)`, but it should at least - // help, and it ensures users can always surround their operator ref in - // parens `(/)` to fix the issue. + // This takes advantage of the consistent operator spacing rule. // TODO: This heuristic should be sunk into the Swift library once we have a // way of doing fix-its from there. auto *RegexContentStart = TokStart + 1; switch (*RegexContentStart) { - case ')': { - if (!MustBeRegex) - return false; - - // ')' is invalid anyway, so we can let the parser diagnose it. - break; - } case ' ': case '\t': { if (!MustBeRegex) @@ -2079,6 +2067,48 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { Ptr--; } + // If we're tentatively lexing `/.../`, scan to make sure we don't have any + // unbalanced ')'s. This helps avoid ambiguity with unapplied operator + // references e.g `reduce(1, /)` and `foo(/, 0) / 2`. This would be invalid + // regex syntax anyways. This ensures users can surround their operator ref + // in parens `(/)` to fix the issue. This also applies to prefix operators + // that can be disambiguated as e.g `(/S.foo)`. Note we need to track whether + // or not we're in a custom character class `[...]`, as parens are literal + // there. + // TODO: This should be sunk into the Swift library. + if (IsForwardSlash && !MustBeRegex) { + unsigned CharClassDepth = 0; + unsigned GroupDepth = 0; + for (auto *Cursor = TokStart + 1; Cursor < Ptr - 1; Cursor++) { + switch (*Cursor) { + case '\\': + // Skip over the next character of an escape. + Cursor++; + break; + case '(': + if (CharClassDepth == 0) + GroupDepth += 1; + break; + case ')': + if (CharClassDepth != 0) + break; + + // Invalid, so bail. + if (GroupDepth == 0) + return false; + + GroupDepth -= 1; + break; + case '[': + CharClassDepth += 1; + break; + case ']': + if (CharClassDepth != 0) + CharClassDepth -= 1; + } + } + } + // Update to point to where we ended regex lexing. assert(Ptr > TokStart && Ptr <= BufferEnd); CurPtr = Ptr; diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift index 49fcf64c4ac1f..b80504615e0e1 100644 --- a/test/StringProcessing/Parse/forward-slash-regex.swift +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -6,6 +6,8 @@ prefix operator / prefix operator ^/ prefix operator /^/ +prefix func ^/ (_ x: T) -> T { x } // expected-note {{'^/' declared here}} + prefix operator !! prefix func !! (_ x: T) -> T { x } @@ -261,6 +263,8 @@ _ = /x // comment _ = /x/*comment*/ // expected-error@-1 {{'/' is not a prefix unary operator}} +// MARK: Unapplied operators + // These become regex literals, unless surrounded in parens. func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 4{{'baz' declared here}} baz(/, /) @@ -275,11 +279,7 @@ baz(/^, /) // expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type '(Int, Int) -> Int'}} // expected-error@-2 {{missing argument for parameter #2 in call}} -do { - baz((/^), /) - // expected-error@-1 {{closing ')' does not balance any groups openings}} - // expected-note@-2 {{to match this opening '('}} -} // expected-error {{expected ')' in expression list}} +baz((/^), /) baz(^^/, /) // expected-error {{missing argument for parameter #2 in call}} baz((^^/), /) @@ -289,20 +289,18 @@ bazbaz(/, 0) bazbaz(^^/, 0) func qux(_ x: (Int, Int) -> Int, _ y: T) -> Int { 0 } -do { - _ = qux(/, 1) / 2 - // expected-error@-1 {{cannot parse regular expression: closing ')' does not balance any groups openings}} - // expected-error@-2 {{expected ',' separator}} -} +_ = qux(/, 1) / 2 do { _ = qux(/, "(") / 2 // expected-error@-1 {{cannot convert value of type 'Regex<(Substring, Substring)>' to expected argument type '(Int, Int) -> Int'}} // expected-error@-2 {{expected ',' separator}} } +_ = qux((/), "(") / 2 _ = qux(/, 1) // this comment tests to make sure we don't try and end the regex on the starting '/' of '//'. _ = qux(/, 1) /* same thing with a block comment */ -func quxqux(_ x: (Int, Int) -> Int) {} +@discardableResult +func quxqux(_ x: (Int, Int) -> Int) -> Int { 0 } quxqux(/^/) // expected-error {{cannot convert value of type 'Regex' to expected argument type '(Int, Int) -> Int'}} quxqux((/^/)) // expected-error {{cannot convert value of type 'Regex' to expected argument type '(Int, Int) -> Int'}} quxqux({ $0 /^/ $1 }) @@ -312,17 +310,88 @@ quxqux(!/^/) // expected-error@-2 {{cannot convert value of type 'Regex' to expected argument type 'Bool'}} quxqux(/^) - -do { - quxqux(/^) / 1 - // expected-error@-1 {{closing ')' does not balance any groups openings}} - // expected-error@-2 {{expected ',' separator}} -} +_ = quxqux(/^) / 1 let arr: [Double] = [2, 3, 4] _ = arr.reduce(1, /) / 3 _ = arr.reduce(1, /) + arr.reduce(1, /) +// MARK: ')' disambiguation behavior + +_ = (/x) +// expected-error@-1 {{'/' is not a prefix unary operator}} + +_ = (/x)/ +// expected-error@-1 {{'/' is not a prefix unary operator}} +// expected-error@-2 {{'/' is not a postfix unary operator}} + +_ = (/[(0)])/ +// expected-error@-1 {{'/' is not a prefix unary operator}} +// expected-error@-2 {{'/' is not a postfix unary operator}} + +_ = /[(0)]/ +_ = /(x)/ +_ = /[)]/ +_ = /[a\])]/ +_ = /([)])/ +_ = /]]][)]/ + +_ = / +// expected-error@-1 {{unterminated regex literal}} + +_ = /) +// expected-error@-1 {{unterminated regex literal}} +// expected-error@-2 {{closing ')' does not balance any groups openings}} + +let fn: (Int, Int) -> Int = (/) + +_ = /\()/ +// expected-error@-1 {{'/' is not a prefix unary operator}} +// expected-error@-2 {{'/' is not a postfix unary operator}} +// expected-error@-3 {{invalid component of Swift key path}} + +do { + let _: Regex = (/whatever\)/ + // expected-note@-1 {{to match this opening '('}} +} // expected-error {{expected ')' in expression list}} +do { + _ = /(()()))/ + // expected-error@-1 {{'/' is not a prefix unary operator}} + // expected-error@-2 {{consecutive statements on a line must be separated by ';'}} + // expected-error@-3 {{expected expression}} +} +do { + _ = /[x])/ + // expected-error@-1 {{'/' is not a prefix unary operator}} + // expected-error@-2 {{consecutive statements on a line must be separated by ';'}} + // expected-error@-3 {{expected expression}} +} +do { + _ = /[\]])/ + // expected-error@-1 {{expected expression path in Swift key path}} +} + +_ = ^/x/ +// expected-error@-1 {{'^' is not a prefix unary operator}} + +_ = (^/x)/ +// expected-error@-1 {{'/' is not a postfix unary operator}} + +_ = (!!/x/) + +_ = ^/"/" +// expected-error@-1 {{'^' is not a prefix unary operator}} +// expected-error@-2 {{unterminated string literal}} + +_ = ^/"[/" +// expected-error@-1 {{'^' is not a prefix unary operator}} +// expected-error@-2 {{unterminated string literal}} +// expected-error@-3 {{expected custom character class members}} + +_ = (^/)("/") + +// MARK: Starting characters + // Fine. _ = /./ diff --git a/test/StringProcessing/Parse/prefix-slash.swift b/test/StringProcessing/Parse/prefix-slash.swift new file mode 100644 index 0000000000000..461f65a621df4 --- /dev/null +++ b/test/StringProcessing/Parse/prefix-slash.swift @@ -0,0 +1,23 @@ +// RUN: %target-typecheck-verify-swift -enable-bare-slash-regex -disable-availability-checking +// REQUIRES: swift_in_compiler + +// Test the behavior of prefix '/' with regex literals enabled. + +prefix operator / +prefix func / (_ x: T) -> T { x } + +enum E { + case e + func foo(_ x: T) {} +} + +_ = /E.e +(/E.e).foo(/0) + +func foo(_ x: T, _ y: U) {} +foo(/E.e, /E.e) // expected-error {{expected ',' separator}} +foo((/E.e), /E.e) +foo((/)(E.e), /E.e) + +func bar(_ x: T) -> Int { 0 } +_ = bar(/E.e) / 2