diff --git a/include/swift/AST/DiagnosticEngine.h b/include/swift/AST/DiagnosticEngine.h index 0a797dadcb339..e0cefd3d6652d 100644 --- a/include/swift/AST/DiagnosticEngine.h +++ b/include/swift/AST/DiagnosticEngine.h @@ -848,6 +848,7 @@ namespace swift { friend class DiagnosticTransaction; friend class CompoundDiagnosticTransaction; friend class DiagnosticStateRAII; + friend class DiagnosticQueue; public: explicit DiagnosticEngine(SourceManager &SourceMgr) @@ -1137,10 +1138,16 @@ namespace swift { /// Send \c diag to all diagnostic consumers. void emitDiagnostic(const Diagnostic &diag); + /// Handle a new diagnostic, which will either be emitted, or added to an + /// active transaction. + void handleDiagnostic(Diagnostic diag); + /// Send all tentative diagnostics to all diagnostic consumers and /// delete them. void emitTentativeDiagnostics(); + void forwardTentativeDiagnosticsTo(DiagnosticEngine &targetEngine); + public: DiagnosticKind declaredDiagnosticKindFor(const DiagID id); @@ -1333,6 +1340,50 @@ namespace swift { } }; + class DiagnosticQueue final { + /// The underlying diagnostic engine that the diagnostics will be emitted + /// by. + DiagnosticEngine &UnderlyingEngine; + + /// A temporary engine used to queue diagnostics. + DiagnosticEngine QueueEngine; + + public: + DiagnosticQueue(const DiagnosticQueue &) = delete; + DiagnosticQueue &operator=(const DiagnosticQueue &) = delete; + + explicit DiagnosticQueue(DiagnosticEngine &engine) + : UnderlyingEngine(engine), QueueEngine(engine.SourceMgr) { + // Open a transaction to avoid emitting any diagnostics. + QueueEngine.TransactionCount++; + } + + /// Retrieve the engine which may be used to enqueue diagnostics. + DiagnosticEngine &getDiags() { return QueueEngine; } + + /// Retrieve the underlying engine which will receive the diagnostics. + DiagnosticEngine &getUnderlyingDiags() { return UnderlyingEngine; } + + /// Clear this queue and erase all diagnostics recorded. + void clear() { + assert(QueueEngine.TransactionCount == 1 && + "Haven't closed outstanding DiagnosticTransactions"); + QueueEngine.TentativeDiagnostics.clear(); + } + + /// Emit all the diagnostics recorded by this queue. + void emit() { + assert(QueueEngine.TransactionCount == 1 && + "Haven't closed outstanding DiagnosticTransactions"); + QueueEngine.forwardTentativeDiagnosticsTo(UnderlyingEngine); + } + + ~DiagnosticQueue() { + emit(); + QueueEngine.TransactionCount--; + } + }; + inline void DiagnosticEngine::diagnoseWithNotes(InFlightDiagnostic parentDiag, llvm::function_ref builder) { diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def index 661e21f6c5ab9..9e9a99515ce22 100644 --- a/include/swift/AST/DiagnosticsParse.def +++ b/include/swift/AST/DiagnosticsParse.def @@ -94,6 +94,9 @@ ERROR(forbidden_extended_escaping_string,none, ERROR(regex_literal_parsing_error,none, "%0", (StringRef)) +ERROR(prefix_slash_not_allowed,none, + "prefix slash not allowed", ()) + //------------------------------------------------------------------------------ // MARK: Lexer diagnostics //------------------------------------------------------------------------------ diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h index ceda74e5a630d..b19e54ffc3d7f 100644 --- a/include/swift/Parse/Lexer.h +++ b/include/swift/Parse/Lexer.h @@ -75,7 +75,10 @@ class Lexer { const LangOptions &LangOpts; const SourceManager &SourceMgr; const unsigned BufferID; - DiagnosticEngine *Diags; + + /// A queue of diagnostics to emit when a token is consumed. We want to queue + /// them, as the parser may backtrack and re-lex a token. + Optional DiagQueue; using State = LexerState; @@ -154,6 +157,19 @@ class Lexer { void initialize(unsigned Offset, unsigned EndOffset); + /// Retrieve the diagnostic engine for emitting diagnostics for the current + /// token. + DiagnosticEngine *getTokenDiags() { + return DiagQueue ? &DiagQueue->getDiags() : nullptr; + } + + /// Retrieve the underlying diagnostic engine we emit diagnostics to. Note + /// this should only be used for diagnostics not concerned with the current + /// token. + DiagnosticEngine *getUnderlyingDiags() { + return DiagQueue ? &DiagQueue->getUnderlyingDiags() : nullptr; + } + public: /// Create a normal lexer that scans the whole source buffer. /// @@ -209,6 +225,10 @@ class Lexer { LeadingTriviaResult = LeadingTrivia; TrailingTriviaResult = TrailingTrivia; } + // Emit any diagnostics recorded for this token. + if (DiagQueue) + DiagQueue->emit(); + if (Result.isNot(tok::eof)) lexImpl(); } @@ -298,12 +318,12 @@ class Lexer { void restoreState(State S, bool enableDiagnostics = false) { assert(S.isValid()); CurPtr = getBufferPtrForSourceLoc(S.Loc); - // Don't reemit diagnostics while readvancing the lexer. - llvm::SaveAndRestore - D(Diags, enableDiagnostics ? Diags : nullptr); - lexImpl(); + // Don't re-emit diagnostics from readvancing the lexer. + if (DiagQueue && !enableDiagnostics) + DiagQueue->clear(); + // Restore Trivia. if (TriviaRetention == TriviaRetentionMode::WithTrivia) LeadingTrivia = S.LeadingTrivia; @@ -505,7 +525,7 @@ class Lexer { void getStringLiteralSegments(const Token &Str, SmallVectorImpl &Segments) { - return getStringLiteralSegments(Str, Segments, Diags); + return getStringLiteralSegments(Str, Segments, getTokenDiags()); } static SourceLoc getSourceLoc(const char *Loc) { @@ -531,6 +551,9 @@ class Lexer { void operator=(const SILBodyRAII&) = delete; }; + /// Attempt to re-lex a regex literal with forward slashes `/.../`. + bool tryLexAsForwardSlashRegexLiteral(State S); + private: /// Nul character meaning kind. enum class NulCharacterKind { diff --git a/include/swift/Parse/Parser.h b/include/swift/Parse/Parser.h index 78357e240be38..f22a8b6926bb9 100644 --- a/include/swift/Parse/Parser.h +++ b/include/swift/Parse/Parser.h @@ -559,6 +559,11 @@ class Parser { return f(backtrackScope); } + /// Discard the current token. This will avoid interface hashing or updating + /// the previous loc. Only should be used if you've completely re-lexed + /// a different token at that position. + SourceLoc discardToken(); + /// Consume a token that we created on the fly to correct the original token /// stream from lexer. void consumeExtraToken(Token K); @@ -1745,8 +1750,11 @@ class Parser { ParserResult parseExprPoundCodeCompletion(Optional ParentKind); + UnresolvedDeclRefExpr *makeExprOperator(Token opToken); UnresolvedDeclRefExpr *parseExprOperator(); + void tryLexRegexLiteral(); + void validateCollectionElement(ParserResult element); //===--------------------------------------------------------------------===// diff --git a/lib/AST/DiagnosticEngine.cpp b/lib/AST/DiagnosticEngine.cpp index 8eb534888a140..983cb5403e22c 100644 --- a/lib/AST/DiagnosticEngine.cpp +++ b/lib/AST/DiagnosticEngine.cpp @@ -1040,15 +1040,19 @@ DiagnosticBehavior DiagnosticState::determineBehavior(const Diagnostic &diag) { void DiagnosticEngine::flushActiveDiagnostic() { assert(ActiveDiagnostic && "No active diagnostic to flush"); + handleDiagnostic(std::move(*ActiveDiagnostic)); + ActiveDiagnostic.reset(); +} + +void DiagnosticEngine::handleDiagnostic(Diagnostic diag) { if (TransactionCount == 0) { - emitDiagnostic(*ActiveDiagnostic); + emitDiagnostic(diag); WrappedDiagnostics.clear(); WrappedDiagnosticArgs.clear(); } else { - onTentativeDiagnosticFlush(*ActiveDiagnostic); - TentativeDiagnostics.emplace_back(std::move(*ActiveDiagnostic)); + onTentativeDiagnosticFlush(diag); + TentativeDiagnostics.emplace_back(std::move(diag)); } - ActiveDiagnostic.reset(); } void DiagnosticEngine::emitTentativeDiagnostics() { @@ -1060,6 +1064,16 @@ void DiagnosticEngine::emitTentativeDiagnostics() { WrappedDiagnosticArgs.clear(); } +void DiagnosticEngine::forwardTentativeDiagnosticsTo( + DiagnosticEngine &targetEngine) { + for (auto &diag : TentativeDiagnostics) { + targetEngine.handleDiagnostic(diag); + } + TentativeDiagnostics.clear(); + WrappedDiagnostics.clear(); + WrappedDiagnosticArgs.clear(); +} + /// Returns the access level of the least accessible PrettyPrintedDeclarations /// buffer that \p decl should appear in. /// diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 96a3ef892168f..4e5e27b536130 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -183,9 +183,12 @@ Lexer::Lexer(const PrincipalTag &, const LangOptions &LangOpts, HashbangMode HashbangAllowed, CommentRetentionMode RetainComments, TriviaRetentionMode TriviaRetention) : LangOpts(LangOpts), SourceMgr(SourceMgr), BufferID(BufferID), - Diags(Diags), LexMode(LexMode), + LexMode(LexMode), IsHashbangAllowed(HashbangAllowed == HashbangMode::Allowed), - RetainComments(RetainComments), TriviaRetention(TriviaRetention) {} + RetainComments(RetainComments), TriviaRetention(TriviaRetention) { + if (Diags) + DiagQueue.emplace(*Diags); +} void Lexer::initialize(unsigned Offset, unsigned EndOffset) { assert(Offset <= EndOffset); @@ -245,7 +248,7 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr, Lexer::Lexer(Lexer &Parent, State BeginState, State EndState) : Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID, - Parent.Diags, Parent.LexMode, + Parent.getUnderlyingDiags(), Parent.LexMode, Parent.IsHashbangAllowed ? HashbangMode::Allowed : HashbangMode::Disallowed, @@ -261,7 +264,7 @@ Lexer::Lexer(Lexer &Parent, State BeginState, State EndState) } InFlightDiagnostic Lexer::diagnose(const char *Loc, Diagnostic Diag) { - if (Diags) + if (auto *Diags = getTokenDiags()) return Diags->diagnose(getSourceLoc(Loc), Diag); return InFlightDiagnostic(); @@ -272,7 +275,7 @@ Token Lexer::getTokenAt(SourceLoc Loc) { SourceMgr.findBufferContainingLoc(Loc)) && "location from the wrong buffer"); - Lexer L(LangOpts, SourceMgr, BufferID, Diags, LexMode, + Lexer L(LangOpts, SourceMgr, BufferID, getUnderlyingDiags(), LexMode, HashbangMode::Allowed, CommentRetentionMode::None, TriviaRetentionMode::WithoutTrivia); L.restoreState(State(Loc)); @@ -330,6 +333,7 @@ void Lexer::formStringLiteralToken(const char *TokStart, return; NextToken.setStringLiteral(IsMultilineString, CustomDelimiterLen); + auto *Diags = getTokenDiags(); if (IsMultilineString && Diags) validateMultilineIndents(NextToken, Diags); } @@ -416,7 +420,8 @@ static bool advanceToEndOfLine(const char *&CurPtr, const char *BufferEnd, } void Lexer::skipToEndOfLine(bool EatNewline) { - bool isEOL = advanceToEndOfLine(CurPtr, BufferEnd, CodeCompletionPtr, Diags); + bool isEOL = + advanceToEndOfLine(CurPtr, BufferEnd, CodeCompletionPtr, getTokenDiags()); if (EatNewline && isEOL) { ++CurPtr; NextToken.setAtStartOfLine(true); @@ -514,8 +519,8 @@ static bool skipToEndOfSlashStarComment(const char *&CurPtr, /// skipSlashStarComment - /**/ comments are skipped (treated as whitespace). /// Note that (unlike in C) block comments can be nested. void Lexer::skipSlashStarComment() { - bool isMultiline = - skipToEndOfSlashStarComment(CurPtr, BufferEnd, CodeCompletionPtr, Diags); + bool isMultiline = skipToEndOfSlashStarComment( + CurPtr, BufferEnd, CodeCompletionPtr, getTokenDiags()); if (isMultiline) NextToken.setAtStartOfLine(true); } @@ -1360,7 +1365,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote, if (!IsMultilineString && !CustomDelimiterLen) return ~0U; - DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr; + DiagnosticEngine *D = EmitDiagnostics ? getTokenDiags() : nullptr; auto TmpPtr = CurPtr; if (IsMultilineString && !advanceIfMultilineDelimiter(CustomDelimiterLen, TmpPtr, D)) @@ -1385,7 +1390,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote, return CurPtr[-1]; case '\\': // Escapes. if (!delimiterMatches(CustomDelimiterLen, CurPtr, - EmitDiagnostics ? Diags : nullptr)) + EmitDiagnostics ? getTokenDiags() : nullptr)) return '\\'; break; } @@ -1799,7 +1804,7 @@ static void validateMultilineIndents(const Token &Str, void Lexer::diagnoseSingleQuoteStringLiteral(const char *TokStart, const char *TokEnd) { assert(*TokStart == '\'' && TokEnd[-1] == '\''); - if (!Diags) // or assert? + if (!getTokenDiags()) // or assert? return; auto startLoc = Lexer::getSourceLoc(TokStart); @@ -1836,7 +1841,8 @@ void Lexer::diagnoseSingleQuoteStringLiteral(const char *TokStart, replacement.append(OutputPtr, Ptr - 1); replacement.push_back('"'); - Diags->diagnose(startLoc, diag::lex_single_quote_string) + getTokenDiags() + ->diagnose(startLoc, diag::lex_single_quote_string) .fixItReplaceChars(startLoc, endLoc, replacement); } @@ -1852,8 +1858,8 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { // diagnostics about changing them to double quotes. assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start"); - bool IsMultilineString = advanceIfMultilineDelimiter(CustomDelimiterLen, - CurPtr, Diags, true); + bool IsMultilineString = advanceIfMultilineDelimiter( + CustomDelimiterLen, CurPtr, getTokenDiags(), true); if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') diagnose(CurPtr, diag::lex_illegal_multiline_string_start) .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n"); @@ -1997,6 +2003,63 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { return true; } +bool Lexer::tryLexAsForwardSlashRegexLiteral(State S) { + auto priorState = getStateForBeginningOfToken(NextToken, LeadingTrivia); + + // Re-lex from the given state. + restoreState(S); + + // While we restored state, that would have re-advanced the lexer. This is + // good in that it's filled in all the interesting properties of the token + // (trivia, is on new line, etc), but we need to rewind to re-lex the actual + // kind. + auto *TokStart = getBufferPtrForSourceLoc(NextToken.getLoc()); + assert(*TokStart == '/'); + CurPtr = TokStart + 1; + + auto bail = [&]() -> bool { + restoreState(priorState); + return false; + }; + + // We need to ban these characters at the start of a regex to avoid ambiguity + // with unapplied operator references, e.g 'arr.reduce(0, /) / 5'. This + // doesn't totally save us from e.g 'foo(/, /)', but it should help. + switch (*CurPtr) { + case ' ': case '\t': case ')': + return bail(); + default: + break; + } + + while (true) { + uint32_t CharValue = validateUTF8CharacterAndAdvance(CurPtr, BufferEnd); + if (CharValue == ~0U) + return bail(); + + // Regex literals cannot span multiple lines. + if (CharValue == '\n' || CharValue == '\r') + return bail(); + + if (CharValue == '\\' && *CurPtr == '/') { + // Skip escaped delimiter and advance. + CurPtr++; + } else if (CharValue == '/') { + // End of literal, stop. + break; + } + } + // We've ended on the opening of a comment, bail. + // TODO: We could treat such cases as postfix operators on a regex literal, + // but it seems more likely the user has written a comment and is in the + // middle of editing the text before it. + if (*CurPtr == '*' || *CurPtr == '/') + return bail(); + + formToken(tok::regex_literal, TokStart); + return true; +} + /// lexEscapedIdentifier: /// identifier ::= '`' identifier '`' /// @@ -2380,6 +2443,11 @@ void Lexer::lexImpl() { assert(CurPtr >= BufferStart && CurPtr <= BufferEnd && "Current pointer out of range!"); + // If we're re-lexing, clear out any previous diagnostics that weren't + // emitted. + if (DiagQueue) + DiagQueue->clear(); + const char *LeadingTriviaStart = CurPtr; if (CurPtr == BufferStart) { if (BufferStart < ContentStart) { @@ -2467,8 +2535,9 @@ void Lexer::lexImpl() { case ':': return formToken(tok::colon, TokStart); case '\\': return formToken(tok::backslash, TokStart); - case '#': + case '#': { // Try lex a raw string literal. + auto *Diags = getTokenDiags(); if (unsigned CustomDelimiterLen = advanceIfCustomDelimiter(CurPtr, Diags)) return lexStringLiteral(CustomDelimiterLen); @@ -2479,8 +2548,8 @@ void Lexer::lexImpl() { // Otherwise try lex a magic pound literal. return lexHash(); - - // Operator characters. + } + // Operator characters. case '/': if (CurPtr[0] == '/') { // "//" skipSlashSlashComment(/*EatNewline=*/true); @@ -2656,7 +2725,7 @@ StringRef Lexer::lexTrivia(bool IsForTrailingTrivia, case 0: switch (getNulCharacterKind(CurPtr - 1)) { case NulCharacterKind::Embedded: { - diagnoseEmbeddedNul(Diags, CurPtr - 1); + diagnoseEmbeddedNul(getTokenDiags(), CurPtr - 1); goto Restart; } case NulCharacterKind::CodeCompletion: diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 2bf7d0b1a1c99..ce257093784eb 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -8603,6 +8603,11 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) { Tok.getRawText().front() == '!')) diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap); + if (Attributes.hasAttribute()) { + if (Tok.getText().contains("/")) + diagnose(Tok, diag::prefix_slash_not_allowed); + } + // A common error is to try to define an operator with something in the // unicode plane considered to be an operator, or to try to define an // operator like "not". Analyze and diagnose this specifically. diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index b48dc05549944..ce941197d1ab1 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -511,6 +511,10 @@ ParserResult Parser::parseExprSequenceElement(Diag<> message, ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { SyntaxParsingContext UnaryContext(SyntaxContext, SyntaxContextKind::Expr); UnresolvedDeclRefExpr *Operator; + + // First check to see if we have the start of a regex literal /.../. + tryLexRegexLiteral(); + switch (Tok.getKind()) { default: // If the next token is not an operator, just parse this as expr-postfix. @@ -532,16 +536,31 @@ ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { case tok::backslash: return parseExprKeyPath(); - case tok::oper_postfix: + case tok::oper_postfix: { // Postfix operators cannot start a subexpression, but can happen // syntactically because the operator may just follow whatever precedes this // expression (and that may not always be an expression). diagnose(Tok, diag::invalid_postfix_operator); Tok.setKind(tok::oper_prefix); - LLVM_FALLTHROUGH; - case tok::oper_prefix: Operator = parseExprOperator(); break; + } + case tok::oper_prefix: { + // Check to see if we can split a prefix operator containing '/', e.g '!/', + // which might be a prefix operator on a regex literal. + auto slashIdx = Tok.getText().find("/"); + if (slashIdx != StringRef::npos) { + auto prefix = Tok.getText().take_front(slashIdx); + if (!prefix.empty()) { + Operator = makeExprOperator({Tok.getKind(), prefix}); + consumeStartingCharacterOfCurrentToken(Tok.getKind(), prefix.size()); + break; + } + diagnose(Tok, diag::prefix_slash_not_allowed); + } + Operator = parseExprOperator(); + break; + } case tok::oper_binary_spaced: case tok::oper_binary_unspaced: { // For recovery purposes, accept an oper_binary here. @@ -860,19 +879,50 @@ static DeclRefKind getDeclRefKindForOperator(tok kind) { } } -/// parseExprOperator - Parse an operator reference expression. These -/// are not "proper" expressions; they can only appear in binary/unary -/// operators. -UnresolvedDeclRefExpr *Parser::parseExprOperator() { +UnresolvedDeclRefExpr *Parser::makeExprOperator(Token Tok) { assert(Tok.isAnyOperator()); DeclRefKind refKind = getDeclRefKindForOperator(Tok.getKind()); SourceLoc loc = Tok.getLoc(); DeclNameRef name(Context.getIdentifier(Tok.getText())); - consumeToken(); // Bypass local lookup. return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc)); } +/// parseExprOperator - Parse an operator reference expression. These +/// are not "proper" expressions; they can only appear in binary/unary +/// operators. +UnresolvedDeclRefExpr *Parser::parseExprOperator() { + auto *op = makeExprOperator(Tok); + consumeToken(); + return op; +} + +void Parser::tryLexRegexLiteral() { + // Check to see if we have the start of a regex literal /.../. + switch (Tok.getKind()) { + case tok::oper_prefix: + case tok::oper_binary_spaced: + case tok::oper_binary_unspaced: { + if (!Tok.getText().startswith("/")) + break; + + // Try re-lex as a /.../ regex literal. + if (!L->tryLexAsForwardSlashRegexLiteral(getParserPosition().LS)) + break; + + // Discard the operator token, which will be replaced by the regex literal + // token. + discardToken(); + + assert(Tok.getText().startswith("/")); + assert(Tok.is(tok::regex_literal)); + break; + } + default: + break; + } +} + /// parseExprSuper /// /// expr-super: @@ -3147,6 +3197,10 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok, SourceLoc FieldNameLoc; parseOptionalArgumentLabel(FieldName, FieldNameLoc); + // First check to see if we have the start of a regex literal /.../. We + // need to do this before handling unapplied operator references. + tryLexRegexLiteral(); + // See if we have an operator decl ref '()'. The operator token in // this case lexes as a binary operator because it neither leads nor // follows a proper subexpression. diff --git a/lib/Parse/ParseRegex.cpp b/lib/Parse/ParseRegex.cpp index 802d0c25c70eb..2e22ded577fbf 100644 --- a/lib/Parse/ParseRegex.cpp +++ b/lib/Parse/ParseRegex.cpp @@ -32,13 +32,31 @@ using namespace swift::syntax; ParserResult Parser::parseExprRegexLiteral() { assert(Tok.is(tok::regex_literal)); - assert(regexLiteralParsingFn); + + // Bail if '-enable-experimental-string-processing' is not enabled. + if (!Context.LangOpts.EnableExperimentalStringProcessing || + !regexLiteralParsingFn) { + diagnose(Tok, diag::regex_literal_parsing_error, + "regex literal requires '-enable-experimental-string-processing'"); + auto loc = consumeToken(); + return makeParserResult(new (Context) ErrorExpr(loc)); + } SyntaxParsingContext LocalContext(SyntaxContext, SyntaxKind::RegexLiteralExpr); auto regexText = Tok.getText(); + // The Swift library doesn't know about `/.../` regexes, let's pretend it's + // `#/.../#` instead. + if (regexText[0] == '/') { + SmallString<32> scratch; + scratch.append("#"); + scratch.append(regexText); + scratch.append("#"); + regexText = Context.AllocateCopy(StringRef(scratch)); + } + // Let the Swift library parse the contents, returning an error, or null if // successful. // TODO: We need to be able to pass back a source location to emit the error diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index 4ef7ea8dea1f6..329f7771a3a6d 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -579,13 +579,16 @@ const Token &Parser::peekToken() { return L->peekNextToken(); } -SourceLoc Parser::consumeTokenWithoutFeedingReceiver() { - SourceLoc Loc = Tok.getLoc(); +SourceLoc Parser::discardToken() { assert(Tok.isNot(tok::eof) && "Lexing past eof!"); + SourceLoc Loc = Tok.getLoc(); + L->lex(Tok, LeadingTrivia, TrailingTrivia); + return Loc; +} +SourceLoc Parser::consumeTokenWithoutFeedingReceiver() { recordTokenHash(Tok); - - L->lex(Tok, LeadingTrivia, TrailingTrivia); + auto Loc = discardToken(); PreviousLoc = Loc; return Loc; } diff --git a/test/StringProcessing/Parse/forward-slash-regex-default.swift b/test/StringProcessing/Parse/forward-slash-regex-default.swift new file mode 100644 index 0000000000000..3fccb6802cdde --- /dev/null +++ b/test/StringProcessing/Parse/forward-slash-regex-default.swift @@ -0,0 +1,13 @@ +// RUN: %target-typecheck-verify-swift + +let _ = /x/ // expected-error {{regex literal requires '-enable-experimental-string-processing'}} + +prefix operator / // expected-error {{prefix slash not allowed}} +prefix operator ^/ // expected-error {{prefix slash not allowed}} + +_ = /x +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} +// expected-error@-3 {{cannot find 'x' in scope}} + +_ = !/x/ // expected-error {{regex literal requires '-enable-experimental-string-processing'}} diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift new file mode 100644 index 0000000000000..b8464634bfed1 --- /dev/null +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -0,0 +1,290 @@ +// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing +// REQUIRES: swift_in_compiler +// REQUIRES: concurrency + +precedencegroup P { + associativity: left +} + +// Fine. +infix operator /^/ : P +func /^/ (lhs: Int, rhs: Int) -> Int { 0 } + +let i = 0 /^/ 1/^/3 + +prefix operator / // expected-error {{prefix slash not allowed}} +prefix operator ^/ // expected-error {{prefix slash not allowed}} + +let x = /abc/ +_ = /abc/ +_ = /x/.self +_ = /\// + +// These unfortunately become infix `=/`. We could likely improve the diagnostic +// though. +let z=/0/ +// expected-error@-1 {{type annotation missing in pattern}} +// expected-error@-2 {{consecutive statements on a line must be separated by ';'}} +// expected-error@-3 {{expected expression after unary operator}} +// expected-error@-4 {{cannot find operator '=/' in scope}} +// expected-error@-5 {{'/' is not a postfix unary operator}} +_=/0/ +// expected-error@-1 {{'_' can only appear in a pattern or on the left side of an assignment}} +// expected-error@-2 {{cannot find operator '=/' in scope}} +// expected-error@-3 {{'/' is not a postfix unary operator}} + +_ = /x +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +_ = !/x/ +// expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type 'Bool'}} + +_ = /x/! // expected-error {{cannot force unwrap value of non-optional type 'Regex'}} +_ = /x/ + /y/ // expected-error {{binary operator '+' cannot be applied to two 'Regex' operands}} + +_ = /x/+/y/ +// expected-error@-1 {{cannot find operator '+/' in scope}} +// expected-error@-2 {{'/' is not a postfix unary operator}} +// expected-error@-3 {{cannot find 'y' in scope}} + +_ = /x/?.blah +// expected-error@-1 {{cannot use optional chaining on non-optional value of type 'Regex'}} +// expected-error@-2 {{value of type 'Regex' has no member 'blah'}} +_ = /x/!.blah +// expected-error@-1 {{cannot force unwrap value of non-optional type 'Regex'}} +// expected-error@-2 {{value of type 'Regex' has no member 'blah'}} + +_ = /x /? // expected-error {{cannot use optional chaining on non-optional value of type 'Regex'}} + .blah // expected-error {{value of type 'Regex' has no member 'blah'}} + +_ = 0; /x / // expected-warning {{regular expression literal is unused}} + +_ = /x / ? 0 : 1 // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +_ = .random() ? /x / : .blah // expected-error {{type 'Regex' has no member 'blah'}} + +_ = /x/ ?? /x/ // expected-warning {{left side of nil coalescing operator '??' has non-optional type 'Regex', so the right side is never used}} +_ = /x / ?? /x / // expected-warning {{left side of nil coalescing operator '??' has non-optional type 'Regex', so the right side is never used}} + +_ = /x/??/x/ // expected-error {{'/' is not a postfix unary operator}} + +_ = /x/ ... /y/ // expected-error {{referencing operator function '...' on 'Comparable' requires that 'Regex' conform to 'Comparable'}} + +_ = /x/.../y/ +// expected-error@-1 {{missing whitespace between '...' and '/' operators}} +// expected-error@-2 {{'/' is not a postfix unary operator}} +// expected-error@-3 {{cannot find 'y' in scope}} + +_ = /x /... +// expected-error@-1 {{unary operator '...' cannot be applied to an operand of type 'Regex'}} +// expected-note@-2 {{overloads for '...' exist with these partially matching parameter lists}} + +do { + _ = true / false /; // expected-error {{expected expression after operator}} +} + +_ = "\(/x/)" + +func defaulted(x: Regex = /x/) {} + +func foo(_ x: T, y: T) {} +foo(/abc/, y: /abc /) + +func bar(_ x: inout T) {} + +// TODO: We split this into a prefix '&', but inout is handled specially when +// parsing an argument list. This shouldn't matter anyway, but we should at +// least have a custom diagnostic. +bar(&/x/) +// expected-error@-1 {{'&' is not a prefix unary operator}} + +struct S { + subscript(x: Regex) -> Void { () } +} + +func testSubscript(_ x: S) { + x[/x/] + x[/x /] +} + +func testReturn() -> Regex { + if .random() { + return /x/ + } + return /x / +} + +func testThrow() throws { + throw /x / // expected-error {{thrown expression type 'Regex' does not conform to 'Error'}} +} + +_ = [/abc/, /abc /] +_ = [/abc/:/abc/] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc/ : /abc/] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc /:/abc /] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc /: /abc /] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = (/abc/, /abc /) +_ = ((/abc /)) + +_ = { /abc/ } +_ = { + /abc/ +} + +let _: () -> Int = { + 0 + / 1 / + 2 +} + +_ = { + 0 // expected-warning {{integer literal is unused}} + /1 / // expected-warning {{regular expression literal is unused}} + 2 // expected-warning {{integer literal is unused}} +} + +// Operator chain, as a regex literal may not start with space. +_ = 2 +/ 1 / .bitWidth + +_ = 2 +/1/ .bitWidth // expected-error {{value of type 'Regex' has no member 'bitWidth'}} + +_ = 2 +/ 1 / + .bitWidth + +_ = 2 +/1 / + .bitWidth // expected-error {{value of type 'Regex' has no member 'bitWidth'}} + +let z = +/y/ + +// While '.' is technically an operator character, it seems more likely that +// the user hasn't written the member name yet. +_ = 0. / 1 / 2 // expected-error {{expected member name following '.'}} +_ = 0 . / 1 / 2 // expected-error {{expected member name following '.'}} + +switch "" { +case /x/: + // expected-error@-1 {{expression pattern of type 'Regex' cannot match values of type 'String'}} + // expected-note@-2 {{overloads for '~=' exist with these partially matching parameter lists: (Substring, String)}} + break +case _ where /x /: + // expected-error@-1 {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} + break +default: + break +} + +do {} catch /x / {} +// expected-error@-1 {{expression pattern of type 'Regex' cannot match values of type 'any Error'}} +// expected-error@-2 {{binary operator '~=' cannot be applied to two 'any Error' operands}} +// expected-warning@-3 {{'catch' block is unreachable because no errors are thrown in 'do' block}} + +switch /x / { +default: + break +} + +if /x / {} // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +if /x /.smth {} // expected-error {{value of type 'Regex' has no member 'smth'}} + +func testGuard() { + guard /x/ else { return } // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +} + +for x in [0] where /x/ {} // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} + +typealias Magic = T +_ = /x/ as Magic +_ = /x/ as! String // expected-warning {{cast from 'Regex' to unrelated type 'String' always fails}} + +_ = type(of: /x /) + +do { + let /x / // expected-error {{expected pattern}} +} + +_ = try /x/; _ = try /x / +// expected-warning@-1 2{{no calls to throwing functions occur within 'try' expression}} + +// TODO: `try?` and `try!` are currently broken. +// _ = try? /x/; _ = try? / x / +// _ = try! /x/; _ = try! / x / + +_ = await /x / // expected-warning {{no 'async' operations occur within 'await' expression}} + +/x/ = 0 // expected-error {{cannot assign to value: literals are not mutable}} +/x/() // expected-error {{cannot call value of non-function type 'Regex'}} + +// TODO: We could allow this (and treat the last '/' as postfix), though it +// seems more likely the user has written a comment and is still in the middle +// of writing the characters before it. +/x// +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +/x // +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +/x/**/ +// expected-error@-1 {{prefix slash not allowed}} +// expected-error@-2 {{'/' is not a prefix unary operator}} + +// These become regex literals, unless surrounded in parens. +func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 2{{'baz' declared here}} +baz(/, /) +// expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type '(Int, Int) -> Int'}} +// expected-error@-2 {{missing argument for parameter #2 in call}} +baz(/,/) +// expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type '(Int, Int) -> Int'}} +// expected-error@-2 {{missing argument for parameter #2 in call}} +baz((/), /) + +func qux(_ x: (Int, Int) -> Int, _ y: T) -> Int { 0 } +do { + _ = qux(/, 1) / 2 + // expected-error@-1 {{cannot parse regular expression: closing ')' does not balance any groups openings}} + // expected-error@-2 {{expected ',' separator}} +} +do { + _ = qux(/, "(") / 2 + // expected-error@-1 {{cannot convert value of type 'Regex<(Substring, Substring)>' to expected argument type '(Int, Int) -> Int'}} + // expected-error@-2 {{expected ',' separator}} +} +_ = qux(/, 1) // this comment tests to make sure we don't try and end the regex on the starting '/' of '//'. + +let arr: [Double] = [2, 3, 4] +_ = arr.reduce(1, /) / 3 +_ = arr.reduce(1, /) + arr.reduce(1, /) + +// Fine. +_ = /./ + +// You need to escape if you want a regex literal to start with these characters. +// TODO: Better recovery +_ = /\ / +do { _ = / / } +// expected-error@-1 2{{unary operator cannot be separated from its operand}} +// expected-error@-2 {{expected expression in assignment}} + +_ = /\)/ +do { _ = /)/ } +// expected-error@-1 {{expected expression after unary operator}} +// expected-error@-2 {{expected expression in assignment}} + +_ = /,/ +_ = /}/ +_ = /]/ +_ = /:/ +_ = /;/ + +// Don't emit diagnostics here, as we re-lex. +_ = /0xG/ +_ = /0oG/ +_ = /"/ +_ = /'/ +_ = /<#placeholder#>/ diff --git a/unittests/Parse/LexerTests.cpp b/unittests/Parse/LexerTests.cpp index b0bc6b7a15936..28ebb111be794 100644 --- a/unittests/Parse/LexerTests.cpp +++ b/unittests/Parse/LexerTests.cpp @@ -804,6 +804,9 @@ TEST_F(LexerTest, DiagnoseEmbeddedNul) { LexerMode::Swift, HashbangMode::Disallowed, CommentRetentionMode::None, TriviaRetentionMode::WithTrivia); + Token Tok; + L.lex(Tok); + ASSERT_TRUE(containsPrefix(DiagConsumer.messages, "1, 2: nul character embedded in middle of file")); ASSERT_TRUE(containsPrefix(DiagConsumer.messages,