From 07b3c765cd26044907b9a1ec44d786011b514681 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 12 Apr 2022 16:03:46 +0100 Subject: [PATCH 1/6] [DiagnosticEngine] Introduce DiagnosticQueue This allows us to hold tentative diagnostics independent of other diagnostic transactions. --- include/swift/AST/DiagnosticEngine.h | 83 ++++++++++++++++++++++++++++ lib/AST/DiagnosticEngine.cpp | 30 +++++++--- 2 files changed, 106 insertions(+), 7 deletions(-) diff --git a/include/swift/AST/DiagnosticEngine.h b/include/swift/AST/DiagnosticEngine.h index 0a797dadcb339..a7bd60d672451 100644 --- a/include/swift/AST/DiagnosticEngine.h +++ b/include/swift/AST/DiagnosticEngine.h @@ -848,6 +848,7 @@ namespace swift { friend class DiagnosticTransaction; friend class CompoundDiagnosticTransaction; friend class DiagnosticStateRAII; + friend class DiagnosticQueue; public: explicit DiagnosticEngine(SourceManager &SourceMgr) @@ -1137,10 +1138,20 @@ namespace swift { /// Send \c diag to all diagnostic consumers. void emitDiagnostic(const Diagnostic &diag); + /// Handle a new diagnostic, which will either be emitted, or added to an + /// active transaction. + void handleDiagnostic(Diagnostic &&diag); + + /// Clear any tentative diagnostics. + void clearTentativeDiagnostics(); + /// Send all tentative diagnostics to all diagnostic consumers and /// delete them. void emitTentativeDiagnostics(); + /// Forward all tentative diagnostics to a different diagnostic engine. + void forwardTentativeDiagnosticsTo(DiagnosticEngine &targetEngine); + public: DiagnosticKind declaredDiagnosticKindFor(const DiagID id); @@ -1333,6 +1344,78 @@ namespace swift { } }; + /// Represents a queue of diagnostics that have their emission delayed until + /// the queue is destroyed. This is similar to DiagnosticTransaction, but + /// with a few key differences: + /// + /// - The queue maintains its own diagnostic engine (which may be accessed + /// through `getDiags()`), and diagnostics must be specifically emitted + /// using that engine to be enqueued. + /// - It allows for non-LIFO transactions, as each queue operates + /// independently. + /// - A queue can be drained multiple times without having to be recreated + /// (unlike DiagnosticTransaction, it has no concept of "closing"). + /// + /// Note you may add DiagnosticTransactions to the queue's diagnostic engine, + /// but they must be closed before attempting to clear or emit the diagnostics + /// in the queue. + /// + class DiagnosticQueue final { + /// The underlying diagnostic engine that the diagnostics will be emitted + /// by. + DiagnosticEngine &UnderlyingEngine; + + /// A temporary engine used to queue diagnostics. + DiagnosticEngine QueueEngine; + + /// Whether the queued diagnostics should be emitted on the destruction of + /// the queue, or whether they should be cleared. + bool EmitOnDestruction; + + public: + DiagnosticQueue(const DiagnosticQueue &) = delete; + DiagnosticQueue &operator=(const DiagnosticQueue &) = delete; + + /// Create a new diagnostic queue with a given engine to forward the + /// diagnostics to. + explicit DiagnosticQueue(DiagnosticEngine &engine, bool emitOnDestruction) + : UnderlyingEngine(engine), QueueEngine(engine.SourceMgr), + EmitOnDestruction(emitOnDestruction) { + // Open a transaction to avoid emitting any diagnostics for the temporary + // engine. + QueueEngine.TransactionCount++; + } + + /// Retrieve the engine which may be used to enqueue diagnostics. + DiagnosticEngine &getDiags() { return QueueEngine; } + + /// Retrieve the underlying engine which will receive the diagnostics. + DiagnosticEngine &getUnderlyingDiags() { return UnderlyingEngine; } + + /// Clear this queue and erase all diagnostics recorded. + void clear() { + assert(QueueEngine.TransactionCount == 1 && + "Must close outstanding DiagnosticTransactions before draining"); + QueueEngine.clearTentativeDiagnostics(); + } + + /// Emit all the diagnostics recorded by this queue. + void emit() { + assert(QueueEngine.TransactionCount == 1 && + "Must close outstanding DiagnosticTransactions before draining"); + QueueEngine.forwardTentativeDiagnosticsTo(UnderlyingEngine); + } + + ~DiagnosticQueue() { + if (EmitOnDestruction) { + emit(); + } else { + clear(); + } + QueueEngine.TransactionCount--; + } + }; + inline void DiagnosticEngine::diagnoseWithNotes(InFlightDiagnostic parentDiag, llvm::function_ref builder) { diff --git a/lib/AST/DiagnosticEngine.cpp b/lib/AST/DiagnosticEngine.cpp index 8eb534888a140..a39892c0b2816 100644 --- a/lib/AST/DiagnosticEngine.cpp +++ b/lib/AST/DiagnosticEngine.cpp @@ -1040,24 +1040,40 @@ DiagnosticBehavior DiagnosticState::determineBehavior(const Diagnostic &diag) { void DiagnosticEngine::flushActiveDiagnostic() { assert(ActiveDiagnostic && "No active diagnostic to flush"); + handleDiagnostic(std::move(*ActiveDiagnostic)); + ActiveDiagnostic.reset(); +} + +void DiagnosticEngine::handleDiagnostic(Diagnostic &&diag) { if (TransactionCount == 0) { - emitDiagnostic(*ActiveDiagnostic); + emitDiagnostic(diag); WrappedDiagnostics.clear(); WrappedDiagnosticArgs.clear(); } else { - onTentativeDiagnosticFlush(*ActiveDiagnostic); - TentativeDiagnostics.emplace_back(std::move(*ActiveDiagnostic)); + onTentativeDiagnosticFlush(diag); + TentativeDiagnostics.emplace_back(std::move(diag)); } - ActiveDiagnostic.reset(); +} + +void DiagnosticEngine::clearTentativeDiagnostics() { + TentativeDiagnostics.clear(); + WrappedDiagnostics.clear(); + WrappedDiagnosticArgs.clear(); } void DiagnosticEngine::emitTentativeDiagnostics() { for (auto &diag : TentativeDiagnostics) { emitDiagnostic(diag); } - TentativeDiagnostics.clear(); - WrappedDiagnostics.clear(); - WrappedDiagnosticArgs.clear(); + clearTentativeDiagnostics(); +} + +void DiagnosticEngine::forwardTentativeDiagnosticsTo( + DiagnosticEngine &targetEngine) { + for (auto &diag : TentativeDiagnostics) { + targetEngine.handleDiagnostic(std::move(diag)); + } + clearTentativeDiagnostics(); } /// Returns the access level of the least accessible PrettyPrintedDeclarations From 080d59b3dfbdcf1cd5f2c299e9c432de7e04f90a Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 12 Apr 2022 16:03:47 +0100 Subject: [PATCH 2/6] [Lexer] Delay token diagnostics Queue up diagnostics when lexing, waiting until `Lexer::lex` is called before emitting them. This allows us to re-lex without having to deal with previously invalid tokens. --- include/swift/Parse/Lexer.h | 32 ++++++++++++++++++----- lib/Parse/Lexer.cpp | 47 +++++++++++++++++++++------------- unittests/Parse/LexerTests.cpp | 3 +++ 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h index ceda74e5a630d..a85dfe66c4823 100644 --- a/include/swift/Parse/Lexer.h +++ b/include/swift/Parse/Lexer.h @@ -75,7 +75,10 @@ class Lexer { const LangOptions &LangOpts; const SourceManager &SourceMgr; const unsigned BufferID; - DiagnosticEngine *Diags; + + /// A queue of diagnostics to emit when a token is consumed. We want to queue + /// them, as the parser may backtrack and re-lex a token. + Optional DiagQueue; using State = LexerState; @@ -154,6 +157,19 @@ class Lexer { void initialize(unsigned Offset, unsigned EndOffset); + /// Retrieve the diagnostic engine for emitting diagnostics for the current + /// token. + DiagnosticEngine *getTokenDiags() { + return DiagQueue ? &DiagQueue->getDiags() : nullptr; + } + + /// Retrieve the underlying diagnostic engine we emit diagnostics to. Note + /// this should only be used for diagnostics not concerned with the current + /// token. + DiagnosticEngine *getUnderlyingDiags() { + return DiagQueue ? &DiagQueue->getUnderlyingDiags() : nullptr; + } + public: /// Create a normal lexer that scans the whole source buffer. /// @@ -209,6 +225,10 @@ class Lexer { LeadingTriviaResult = LeadingTrivia; TrailingTriviaResult = TrailingTrivia; } + // Emit any diagnostics recorded for this token. + if (DiagQueue) + DiagQueue->emit(); + if (Result.isNot(tok::eof)) lexImpl(); } @@ -298,12 +318,12 @@ class Lexer { void restoreState(State S, bool enableDiagnostics = false) { assert(S.isValid()); CurPtr = getBufferPtrForSourceLoc(S.Loc); - // Don't reemit diagnostics while readvancing the lexer. - llvm::SaveAndRestore - D(Diags, enableDiagnostics ? Diags : nullptr); - lexImpl(); + // Don't re-emit diagnostics from readvancing the lexer. + if (DiagQueue && !enableDiagnostics) + DiagQueue->clear(); + // Restore Trivia. if (TriviaRetention == TriviaRetentionMode::WithTrivia) LeadingTrivia = S.LeadingTrivia; @@ -505,7 +525,7 @@ class Lexer { void getStringLiteralSegments(const Token &Str, SmallVectorImpl &Segments) { - return getStringLiteralSegments(Str, Segments, Diags); + return getStringLiteralSegments(Str, Segments, getTokenDiags()); } static SourceLoc getSourceLoc(const char *Loc) { diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 0c0bf0f7be728..662c6ecaca145 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -183,9 +183,12 @@ Lexer::Lexer(const PrincipalTag &, const LangOptions &LangOpts, HashbangMode HashbangAllowed, CommentRetentionMode RetainComments, TriviaRetentionMode TriviaRetention) : LangOpts(LangOpts), SourceMgr(SourceMgr), BufferID(BufferID), - Diags(Diags), LexMode(LexMode), + LexMode(LexMode), IsHashbangAllowed(HashbangAllowed == HashbangMode::Allowed), - RetainComments(RetainComments), TriviaRetention(TriviaRetention) {} + RetainComments(RetainComments), TriviaRetention(TriviaRetention) { + if (Diags) + DiagQueue.emplace(*Diags, /*emitOnDestruction*/ false); +} void Lexer::initialize(unsigned Offset, unsigned EndOffset) { assert(Offset <= EndOffset); @@ -245,7 +248,7 @@ Lexer::Lexer(const LangOptions &Options, const SourceManager &SourceMgr, Lexer::Lexer(Lexer &Parent, State BeginState, State EndState) : Lexer(PrincipalTag(), Parent.LangOpts, Parent.SourceMgr, Parent.BufferID, - Parent.Diags, Parent.LexMode, + Parent.getUnderlyingDiags(), Parent.LexMode, Parent.IsHashbangAllowed ? HashbangMode::Allowed : HashbangMode::Disallowed, @@ -261,7 +264,7 @@ Lexer::Lexer(Lexer &Parent, State BeginState, State EndState) } InFlightDiagnostic Lexer::diagnose(const char *Loc, Diagnostic Diag) { - if (Diags) + if (auto *Diags = getTokenDiags()) return Diags->diagnose(getSourceLoc(Loc), Diag); return InFlightDiagnostic(); @@ -272,7 +275,7 @@ Token Lexer::getTokenAt(SourceLoc Loc) { SourceMgr.findBufferContainingLoc(Loc)) && "location from the wrong buffer"); - Lexer L(LangOpts, SourceMgr, BufferID, Diags, LexMode, + Lexer L(LangOpts, SourceMgr, BufferID, getUnderlyingDiags(), LexMode, HashbangMode::Allowed, CommentRetentionMode::None, TriviaRetentionMode::WithoutTrivia); L.restoreState(State(Loc)); @@ -330,6 +333,7 @@ void Lexer::formStringLiteralToken(const char *TokStart, return; NextToken.setStringLiteral(IsMultilineString, CustomDelimiterLen); + auto *Diags = getTokenDiags(); if (IsMultilineString && Diags) validateMultilineIndents(NextToken, Diags); } @@ -416,7 +420,8 @@ static bool advanceToEndOfLine(const char *&CurPtr, const char *BufferEnd, } void Lexer::skipToEndOfLine(bool EatNewline) { - bool isEOL = advanceToEndOfLine(CurPtr, BufferEnd, CodeCompletionPtr, Diags); + bool isEOL = + advanceToEndOfLine(CurPtr, BufferEnd, CodeCompletionPtr, getTokenDiags()); if (EatNewline && isEOL) { ++CurPtr; NextToken.setAtStartOfLine(true); @@ -514,8 +519,8 @@ static bool skipToEndOfSlashStarComment(const char *&CurPtr, /// skipSlashStarComment - /**/ comments are skipped (treated as whitespace). /// Note that (unlike in C) block comments can be nested. void Lexer::skipSlashStarComment() { - bool isMultiline = - skipToEndOfSlashStarComment(CurPtr, BufferEnd, CodeCompletionPtr, Diags); + bool isMultiline = skipToEndOfSlashStarComment( + CurPtr, BufferEnd, CodeCompletionPtr, getTokenDiags()); if (isMultiline) NextToken.setAtStartOfLine(true); } @@ -1360,7 +1365,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote, if (!IsMultilineString && !CustomDelimiterLen) return ~0U; - DiagnosticEngine *D = EmitDiagnostics ? Diags : nullptr; + DiagnosticEngine *D = EmitDiagnostics ? getTokenDiags() : nullptr; auto TmpPtr = CurPtr; if (IsMultilineString && !advanceIfMultilineDelimiter(CustomDelimiterLen, TmpPtr, D)) @@ -1385,7 +1390,7 @@ unsigned Lexer::lexCharacter(const char *&CurPtr, char StopQuote, return CurPtr[-1]; case '\\': // Escapes. if (!delimiterMatches(CustomDelimiterLen, CurPtr, - EmitDiagnostics ? Diags : nullptr)) + EmitDiagnostics ? getTokenDiags() : nullptr)) return '\\'; break; } @@ -1799,7 +1804,7 @@ static void validateMultilineIndents(const Token &Str, void Lexer::diagnoseSingleQuoteStringLiteral(const char *TokStart, const char *TokEnd) { assert(*TokStart == '\'' && TokEnd[-1] == '\''); - if (!Diags) // or assert? + if (!getTokenDiags()) // or assert? return; auto startLoc = Lexer::getSourceLoc(TokStart); @@ -1836,7 +1841,7 @@ void Lexer::diagnoseSingleQuoteStringLiteral(const char *TokStart, replacement.append(OutputPtr, Ptr - 1); replacement.push_back('"'); - Diags->diagnose(startLoc, diag::lex_single_quote_string) + getTokenDiags()->diagnose(startLoc, diag::lex_single_quote_string) .fixItReplaceChars(startLoc, endLoc, replacement); } @@ -1852,8 +1857,8 @@ void Lexer::lexStringLiteral(unsigned CustomDelimiterLen) { // diagnostics about changing them to double quotes. assert((QuoteChar == '"' || QuoteChar == '\'') && "Unexpected start"); - bool IsMultilineString = advanceIfMultilineDelimiter(CustomDelimiterLen, - CurPtr, Diags, true); + bool IsMultilineString = advanceIfMultilineDelimiter( + CustomDelimiterLen, CurPtr, getTokenDiags(), true); if (IsMultilineString && *CurPtr != '\n' && *CurPtr != '\r') diagnose(CurPtr, diag::lex_illegal_multiline_string_start) .fixItInsert(Lexer::getSourceLoc(CurPtr), "\n"); @@ -2380,6 +2385,11 @@ void Lexer::lexImpl() { assert(CurPtr >= BufferStart && CurPtr <= BufferEnd && "Current pointer out of range!"); + // If we're re-lexing, clear out any previous diagnostics that weren't + // emitted. + if (DiagQueue) + DiagQueue->clear(); + const char *LeadingTriviaStart = CurPtr; if (CurPtr == BufferStart) { if (BufferStart < ContentStart) { @@ -2467,8 +2477,9 @@ void Lexer::lexImpl() { case ':': return formToken(tok::colon, TokStart); case '\\': return formToken(tok::backslash, TokStart); - case '#': + case '#': { // Try lex a raw string literal. + auto *Diags = getTokenDiags(); if (unsigned CustomDelimiterLen = advanceIfCustomDelimiter(CurPtr, Diags)) return lexStringLiteral(CustomDelimiterLen); @@ -2479,8 +2490,8 @@ void Lexer::lexImpl() { // Otherwise try lex a magic pound literal. return lexHash(); - - // Operator characters. + } + // Operator characters. case '/': if (CurPtr[0] == '/') { // "//" skipSlashSlashComment(/*EatNewline=*/true); @@ -2656,7 +2667,7 @@ StringRef Lexer::lexTrivia(bool IsForTrailingTrivia, case 0: switch (getNulCharacterKind(CurPtr - 1)) { case NulCharacterKind::Embedded: { - diagnoseEmbeddedNul(Diags, CurPtr - 1); + diagnoseEmbeddedNul(getTokenDiags(), CurPtr - 1); goto Restart; } case NulCharacterKind::CodeCompletion: diff --git a/unittests/Parse/LexerTests.cpp b/unittests/Parse/LexerTests.cpp index b0bc6b7a15936..28ebb111be794 100644 --- a/unittests/Parse/LexerTests.cpp +++ b/unittests/Parse/LexerTests.cpp @@ -804,6 +804,9 @@ TEST_F(LexerTest, DiagnoseEmbeddedNul) { LexerMode::Swift, HashbangMode::Disallowed, CommentRetentionMode::None, TriviaRetentionMode::WithTrivia); + Token Tok; + L.lex(Tok); + ASSERT_TRUE(containsPrefix(DiagConsumer.messages, "1, 2: nul character embedded in middle of file")); ASSERT_TRUE(containsPrefix(DiagConsumer.messages, From 63b8db1659b96da7e83a7156aab40f891bd5d1fb Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 12 Apr 2022 16:03:47 +0100 Subject: [PATCH 3/6] Start using '-enable-bare-slash-regex' Change the flag to imply '-enable-experimental-string-processing', and and update some tests to start using it. --- include/swift/Basic/LangOptions.h | 5 +++-- lib/Frontend/CompilerInvocation.cpp | 10 +++++++--- test/IDE/complete_regex.swift | 2 +- test/SourceKit/Sema/sema_regex.swift | 2 +- test/StringProcessing/Parse/regex.swift | 2 +- .../Parse/regex_parse_end_of_buffer.swift | 2 +- test/StringProcessing/Parse/regex_parse_error.swift | 2 +- test/StringProcessing/Runtime/regex_basic.swift | 2 +- .../SILGen/regex_literal_silgen.swift | 2 +- .../Sema/regex_literal_type_inference.swift | 2 +- tools/swift-ide-test/swift-ide-test.cpp | 11 ++++++++++- 11 files changed, 28 insertions(+), 14 deletions(-) diff --git a/include/swift/Basic/LangOptions.h b/include/swift/Basic/LangOptions.h index 464841d58fbf9..b14e8a6c4e994 100644 --- a/include/swift/Basic/LangOptions.h +++ b/include/swift/Basic/LangOptions.h @@ -564,8 +564,9 @@ namespace swift { /// Enables dumping type witness systems from associated type inference. bool DumpTypeWitnessSystems = false; - /// Enables `/.../` syntax regular-expression literals - bool EnableForwardSlashRegexLiterals = false; + /// Enables `/.../` syntax regular-expression literals. This requires + /// experimental string processing. Note this does not affect `#/.../#`. + bool EnableBareSlashRegexLiterals = false; /// Sets the target we are building for and updates platform conditions /// to match. diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index d0fa68dac9323..ca67344d14ca5 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -504,6 +504,13 @@ static bool ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.EnableExperimentalStringProcessing |= Args.hasArg(OPT_enable_experimental_string_processing); + // Whether '/.../' regex literals are enabled. This implies experimental + // string processing. + if (Args.hasArg(OPT_enable_bare_slash_regex)) { + Opts.EnableBareSlashRegexLiterals = true; + Opts.EnableExperimentalStringProcessing = true; + } + Opts.EnableExperimentalBoundGenericExtensions |= Args.hasArg(OPT_enable_experimental_bound_generic_extensions); @@ -1010,9 +1017,6 @@ static bool ParseLangArgs(LangOptions &Opts, ArgList &Args, if (Args.hasArg(OPT_disable_requirement_machine_reuse)) Opts.EnableRequirementMachineReuse = false; - if (Args.hasArg(OPT_enable_bare_slash_regex)) - Opts.EnableForwardSlashRegexLiterals = true; - if (Args.hasArg(OPT_enable_requirement_machine_opaque_archetypes)) Opts.EnableRequirementMachineOpaqueArchetypes = true; diff --git a/test/IDE/complete_regex.swift b/test/IDE/complete_regex.swift index b34c25ce9daf6..7ad3cc02d8ddd 100644 --- a/test/IDE/complete_regex.swift +++ b/test/IDE/complete_regex.swift @@ -1,7 +1,7 @@ // REQUIRES: swift_in_compiler // RUN: %empty-directory(%t) -// RUN: %target-swift-ide-test -enable-experimental-string-processing -batch-code-completion -source-filename %s -filecheck %raw-FileCheck -completion-output-dir %t +// RUN: %target-swift-ide-test -enable-bare-slash-regex -batch-code-completion -source-filename %s -filecheck %raw-FileCheck -completion-output-dir %t func testLiteral() { #/foo/#.#^RE_LITERAL_MEMBER^# diff --git a/test/SourceKit/Sema/sema_regex.swift b/test/SourceKit/Sema/sema_regex.swift index 78d50d1658f11..bb1b421d8f3e1 100644 --- a/test/SourceKit/Sema/sema_regex.swift +++ b/test/SourceKit/Sema/sema_regex.swift @@ -3,7 +3,7 @@ public func retRegex() -> Regex { } // REQUIRES: swift_in_compiler -// RUN: %sourcekitd-test -req=sema %s -- %s -Xfrontend -enable-experimental-string-processing | %FileCheck %s +// RUN: %sourcekitd-test -req=sema %s -- %s -Xfrontend -enable-bare-slash-regex | %FileCheck %s // CHECK: [ // CHECK: { diff --git a/test/StringProcessing/Parse/regex.swift b/test/StringProcessing/Parse/regex.swift index 3cae286dfe42c..b6fb31d647f15 100644 --- a/test/StringProcessing/Parse/regex.swift +++ b/test/StringProcessing/Parse/regex.swift @@ -1,4 +1,4 @@ -// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing +// RUN: %target-typecheck-verify-swift -enable-bare-slash-regex // REQUIRES: swift_in_compiler _ = #/abc/# diff --git a/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift b/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift index e1cad9e7ae949..6c8d0d83ad7bf 100644 --- a/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift +++ b/test/StringProcessing/Parse/regex_parse_end_of_buffer.swift @@ -1,4 +1,4 @@ -// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing +// RUN: %target-typecheck-verify-swift -enable-bare-slash-regex // REQUIRES: swift_in_compiler // Note there is purposefully no trailing newline here. diff --git a/test/StringProcessing/Parse/regex_parse_error.swift b/test/StringProcessing/Parse/regex_parse_error.swift index 5929028a9ef56..f91e8cb5013cd 100644 --- a/test/StringProcessing/Parse/regex_parse_error.swift +++ b/test/StringProcessing/Parse/regex_parse_error.swift @@ -1,4 +1,4 @@ -// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing +// RUN: %target-typecheck-verify-swift -enable-bare-slash-regex // REQUIRES: swift_in_compiler _ = #/(/# // expected-error {{expected ')'}} diff --git a/test/StringProcessing/Runtime/regex_basic.swift b/test/StringProcessing/Runtime/regex_basic.swift index 7e957e0379ca0..065159bbbea3d 100644 --- a/test/StringProcessing/Runtime/regex_basic.swift +++ b/test/StringProcessing/Runtime/regex_basic.swift @@ -1,4 +1,4 @@ -// RUN: %target-run-simple-swift(-Xfrontend -enable-experimental-string-processing) +// RUN: %target-run-simple-swift(-Xfrontend -enable-bare-slash-regex) // REQUIRES: swift_in_compiler,string_processing,executable_test diff --git a/test/StringProcessing/SILGen/regex_literal_silgen.swift b/test/StringProcessing/SILGen/regex_literal_silgen.swift index 27fb357efeb36..4dd57786da2e7 100644 --- a/test/StringProcessing/SILGen/regex_literal_silgen.swift +++ b/test/StringProcessing/SILGen/regex_literal_silgen.swift @@ -1,4 +1,4 @@ -// RUN: %target-swift-frontend -emit-silgen -enable-experimental-string-processing %s | %FileCheck %s +// RUN: %target-swift-frontend -emit-silgen -enable-bare-slash-regex %s | %FileCheck %s // REQUIRES: swift_in_compiler var s = #/abc/# diff --git a/test/StringProcessing/Sema/regex_literal_type_inference.swift b/test/StringProcessing/Sema/regex_literal_type_inference.swift index ceda3794d3a94..02387f7e814cb 100644 --- a/test/StringProcessing/Sema/regex_literal_type_inference.swift +++ b/test/StringProcessing/Sema/regex_literal_type_inference.swift @@ -1,4 +1,4 @@ -// RUN: %target-typecheck-verify-swift -enable-experimental-string-processing +// RUN: %target-typecheck-verify-swift -enable-bare-slash-regex // REQUIRES: swift_in_compiler let r0 = #/./# diff --git a/tools/swift-ide-test/swift-ide-test.cpp b/tools/swift-ide-test/swift-ide-test.cpp index 05d5e59c12ad8..485d9ab43702f 100644 --- a/tools/swift-ide-test/swift-ide-test.cpp +++ b/tools/swift-ide-test/swift-ide-test.cpp @@ -815,6 +815,11 @@ static llvm::cl::opt EnableExperimentalStringProcessing( llvm::cl::desc("Enable experimental string processing"), llvm::cl::init(false)); +static llvm::cl::opt EnableBareSlashRegexLiterals( + "enable-bare-slash-regex", + llvm::cl::desc("Enable the ability to write '/.../' regex literals"), + llvm::cl::init(false)); + static llvm::cl::list AccessNotesPath("access-notes-path", llvm::cl::desc("Path to access notes file"), llvm::cl::cat(Category)); @@ -4288,7 +4293,11 @@ int main(int argc, char *argv[]) { InitInvok.getLangOptions().EnableExperimentalNamedOpaqueTypes = true; } if (options::EnableExperimentalStringProcessing) { - InitInvok.getLangOptions().EnableExperimentalStringProcessing= true; + InitInvok.getLangOptions().EnableExperimentalStringProcessing = true; + } + if (options::EnableBareSlashRegexLiterals) { + InitInvok.getLangOptions().EnableBareSlashRegexLiterals = true; + InitInvok.getLangOptions().EnableExperimentalStringProcessing = true; } if (!options::Triple.empty()) From 5a8dff0a769165da4bb29ebe86e3062ce975621e Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 12 Apr 2022 16:03:48 +0100 Subject: [PATCH 4/6] [Parse] Emit error on prefix operator containing `/` When forward slash regex is enabled, start emitting an error on prefix operators containing the `/` character. --- include/swift/AST/DiagnosticsParse.def | 3 +++ lib/Parse/ParseDecl.cpp | 7 +++++++ test/StringProcessing/Parse/forward-slash-regex.swift | 5 +++++ 3 files changed, 15 insertions(+) create mode 100644 test/StringProcessing/Parse/forward-slash-regex.swift diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def index 6cd6adb1845a0..6e4537253260c 100644 --- a/include/swift/AST/DiagnosticsParse.def +++ b/include/swift/AST/DiagnosticsParse.def @@ -94,6 +94,9 @@ ERROR(forbidden_extended_escaping_string,none, ERROR(regex_literal_parsing_error,none, "%0", (StringRef)) +ERROR(prefix_slash_not_allowed,none, + "prefix operator may not contain '/'", ()) + //------------------------------------------------------------------------------ // MARK: Lexer diagnostics //------------------------------------------------------------------------------ diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 4a1a41a05100f..e8c43af987100 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -8528,6 +8528,13 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) { Tok.getRawText().front() == '!')) diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap); + // Prefix operators may not contain the `/` character when `/.../` regex + // literals are enabled. + if (Context.LangOpts.EnableBareSlashRegexLiterals) { + if (Attributes.hasAttribute() && Tok.getText().contains("/")) + diagnose(Tok, diag::prefix_slash_not_allowed); + } + // A common error is to try to define an operator with something in the // unicode plane considered to be an operator, or to try to define an // operator like "not". Analyze and diagnose this specifically. diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift new file mode 100644 index 0000000000000..5c41aae4c7be1 --- /dev/null +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -0,0 +1,5 @@ +// RUN: %target-typecheck-verify-swift -enable-bare-slash-regex + +prefix operator / // expected-error {{prefix operator may not contain '/'}} +prefix operator ^/ // expected-error {{prefix operator may not contain '/'}} +prefix operator /^/ // expected-error {{prefix operator may not contain '/'}} From 9f384d393fa351fcd7c01b6537e54d1a744d6215 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 12 Apr 2022 16:03:48 +0100 Subject: [PATCH 5/6] [Lexer] Remove `r'...'` lexing logic This spelling is no longer used. --- lib/Parse/Lexer.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index 662c6ecaca145..b2e7561d5ee7e 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -2541,20 +2541,13 @@ void Lexer::lexImpl() { case '&': case '|': case '^': case '~': case '.': return lexOperatorIdentifier(); - case 'r': - // If we have experimental string processing enabled, try lex a regex - // literal. - if (tryLexRegexLiteral(TokStart)) - return; - LLVM_FALLTHROUGH; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': /*r above*/ case 's': case 't': case 'u': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '_': return lexIdentifier(); From f1a799037e3d00ea530480a1d67c281524a29a42 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 12 Apr 2022 16:03:49 +0100 Subject: [PATCH 6/6] [Parse] Introduce `/.../` regex literals Start parsing regex literals with `/.../` delimiters. rdar://83253726 --- include/swift/AST/DiagnosticsParse.def | 5 +- include/swift/Parse/Lexer.h | 23 +- include/swift/Parse/Parser.h | 14 + lib/Parse/Lexer.cpp | 115 +++++++- lib/Parse/ParseExpr.cpp | 74 ++++- lib/Parse/Parser.cpp | 11 +- test/IDE/complete_regex.swift | 2 +- test/SourceKit/Sema/sema_regex.swift | 2 +- .../Parse/forward-slash-regex.swift | 278 ++++++++++++++++++ test/StringProcessing/Parse/regex.swift | 7 +- .../Parse/regex_parse_error.swift | 22 +- 11 files changed, 522 insertions(+), 31 deletions(-) diff --git a/include/swift/AST/DiagnosticsParse.def b/include/swift/AST/DiagnosticsParse.def index 6e4537253260c..e1493f59fff94 100644 --- a/include/swift/AST/DiagnosticsParse.def +++ b/include/swift/AST/DiagnosticsParse.def @@ -143,7 +143,10 @@ ERROR(lex_invalid_escape_delimiter,none, ERROR(lex_invalid_closing_delimiter,none, "too many '#' characters in closing delimiter", ()) -ERROR(lex_unterminated_regex,none, +ERROR(lex_regex_literal_invalid_starting_char,none, + "regex literal may not start with %0; add backslash to escape", + (StringRef)) +ERROR(lex_regex_literal_unterminated,none, "unterminated regex literal", ()) ERROR(lex_invalid_unicode_scalar,none, diff --git a/include/swift/Parse/Lexer.h b/include/swift/Parse/Lexer.h index a85dfe66c4823..858eecd82a472 100644 --- a/include/swift/Parse/Lexer.h +++ b/include/swift/Parse/Lexer.h @@ -61,6 +61,16 @@ enum class LexerMode { SIL }; +/// Whether or not the lexer should attempt to lex a `/.../` regex literal. +enum class LexerForwardSlashRegexMode { + /// No `/.../` regex literals will be lexed. + None, + /// A `/.../` regex literal will be lexed, but only if successful. + Tentative, + /// A `/.../` regex literal will always be lexed for a '/' character. + Always +}; + /// Kinds of conflict marker which the lexer might encounter. enum class ConflictMarkerKind { /// A normal or diff3 conflict marker, initiated by at least 7 "<"s, @@ -112,6 +122,10 @@ class Lexer { /// a .sil file. const LexerMode LexMode; + /// Whether or not a `/.../` literal will be lexed. + LexerForwardSlashRegexMode ForwardSlashRegexMode = + LexerForwardSlashRegexMode::None; + /// True if we should skip past a `#!` line at the start of the file. const bool IsHashbangAllowed; @@ -551,6 +565,11 @@ class Lexer { void operator=(const SILBodyRAII&) = delete; }; + /// Attempt to re-lex a regex literal with forward slashes `/.../` from a + /// given lexing state. If \p mustBeRegex is set to true, a regex literal will + /// always be lexed. Otherwise, it will not be lexed if it may be ambiguous. + void tryLexForwardSlashRegexLiteralFrom(State S, bool mustBeRegex); + private: /// Nul character meaning kind. enum class NulCharacterKind { @@ -615,8 +634,8 @@ class Lexer { void lexStringLiteral(unsigned CustomDelimiterLen = 0); void lexEscapedIdentifier(); - /// Attempt to lex a regex literal, returning true if a regex literal was - /// lexed, false if this is not a regex literal. + /// Attempt to lex a regex literal, returning true if lexing should continue, + /// false if this is not a regex literal. bool tryLexRegexLiteral(const char *TokStart); void tryLexEditorPlaceholder(); diff --git a/include/swift/Parse/Parser.h b/include/swift/Parse/Parser.h index 061f1b4ba3eb5..d425d1b4f639e 100644 --- a/include/swift/Parse/Parser.h +++ b/include/swift/Parse/Parser.h @@ -559,6 +559,11 @@ class Parser { return f(backtrackScope); } + /// Discard the current token. This will avoid interface hashing or updating + /// the previous loc. Only should be used if you've completely re-lexed + /// a different token at that position. + SourceLoc discardToken(); + /// Consume a token that we created on the fly to correct the original token /// stream from lexer. void consumeExtraToken(Token K); @@ -1752,8 +1757,17 @@ class Parser { ParserResult parseExprPoundCodeCompletion(Optional ParentKind); + UnresolvedDeclRefExpr *makeExprOperator(const Token &opToken); UnresolvedDeclRefExpr *parseExprOperator(); + /// Try re-lex a '/' operator character as a regex literal. This should be + /// called when parsing in an expression position to ensure a regex literal is + /// correctly parsed. + /// + /// If \p mustBeRegex is set to true, a regex literal will always be lexed if + /// enabled. Otherwise, it will not be lexed if it may be ambiguous. + void tryLexRegexLiteral(bool mustBeRegex); + void validateCollectionElement(ParserResult element); //===--------------------------------------------------------------------===// diff --git a/lib/Parse/Lexer.cpp b/lib/Parse/Lexer.cpp index b2e7561d5ee7e..4b08f2776efec 100644 --- a/lib/Parse/Lexer.cpp +++ b/lib/Parse/Lexer.cpp @@ -1969,7 +1969,75 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { if (!LangOpts.EnableExperimentalStringProcessing || !regexLiteralLexingFn) return false; - // Ask libswift to try and lex a regex literal. + bool MustBeRegex = true; + bool IsForwardSlash = (*TokStart == '/'); + + // Check if we're able to lex a `/.../` regex. + if (IsForwardSlash) { + switch (ForwardSlashRegexMode) { + case LexerForwardSlashRegexMode::None: + return false; + case LexerForwardSlashRegexMode::Tentative: + MustBeRegex = false; + break; + case LexerForwardSlashRegexMode::Always: + break; + } + + // For `/.../` regex literals, we need to ban space and tab at the start of + // a regex to avoid ambiguity with operator chains, e.g: + // + // Builder { + // 0 + // / 1 / + // 2 + // } + // + // This takes advantage of the consistent operator spacing rule. We also + // need to ban ')' to avoid ambiguity with unapplied operator references e.g + // `reduce(1, /)`. This would be invalid regex syntax anyways. Note this + // doesn't totally save us from e.g `foo(/, 0)`, but it should at least + // help, and it ensures users can always surround their operator ref in + // parens `(/)` to fix the issue. + // TODO: This heuristic should be sunk into the Swift library once we have a + // way of doing fix-its from there. + auto *RegexContentStart = TokStart + 1; + switch (*RegexContentStart) { + case ')': { + if (!MustBeRegex) + return false; + + // ')' is invalid anyway, so we can let the parser diagnose it. + break; + } + case ' ': + case '\t': { + if (!MustBeRegex) + return false; + + // We must have a regex, so emit an error for space and tab. + StringRef DiagChar; + switch (*RegexContentStart) { + case ' ': + DiagChar = "space"; + break; + case '\t': + DiagChar = "tab"; + break; + default: + llvm_unreachable("Unhandled case"); + } + diagnose(RegexContentStart, diag::lex_regex_literal_invalid_starting_char, + DiagChar) + .fixItInsert(getSourceLoc(RegexContentStart), "\\"); + break; + } + default: + break; + } + } + + // Ask the Swift library to try and lex a regex literal. // - Ptr will not be advanced if this is not for a regex literal. // - ErrStr will be set if there is any error to emit. // - CompletelyErroneous will be set if there was an error that cannot be @@ -1977,14 +2045,33 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { auto *Ptr = TokStart; const char *ErrStr = nullptr; bool CompletelyErroneous = regexLiteralLexingFn(&Ptr, BufferEnd, &ErrStr); - if (ErrStr) - diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr); // If we didn't make any lexing progress, this isn't a regex literal and we // should fallback to lexing as something else. if (Ptr == TokStart) return false; + if (ErrStr) { + if (!MustBeRegex) + return false; + + diagnose(TokStart, diag::regex_literal_parsing_error, ErrStr); + } + + // If we're lexing `/.../`, error if we ended on the opening of a comment. + // We prefer to lex the comment as it's more likely than not that is what + // the user is expecting. + // TODO: This should be sunk into the Swift library. + if (IsForwardSlash && Ptr[-1] == '/' && (*Ptr == '*' || *Ptr == '/')) { + if (!MustBeRegex) + return false; + + diagnose(TokStart, diag::lex_regex_literal_unterminated); + + // Move the pointer back to the '/' of the comment. + Ptr--; + } + // Update to point to where we ended regex lexing. assert(Ptr > TokStart && Ptr <= BufferEnd); CurPtr = Ptr; @@ -1996,12 +2083,23 @@ bool Lexer::tryLexRegexLiteral(const char *TokStart) { return true; } - // Otherwise, we either had a successful lex, or something that was - // recoverable. + // We either had a successful lex, or something that was recoverable. formToken(tok::regex_literal, TokStart); return true; } +void Lexer::tryLexForwardSlashRegexLiteralFrom(State S, bool mustBeRegex) { + if (!LangOpts.EnableBareSlashRegexLiterals) + return; + + // Try re-lex with forward slash enabled. + llvm::SaveAndRestore RegexLexingScope( + ForwardSlashRegexMode, mustBeRegex + ? LexerForwardSlashRegexMode::Always + : LexerForwardSlashRegexMode::Tentative); + restoreState(S, /*enableDiagnostics*/ true); +} + /// lexEscapedIdentifier: /// identifier ::= '`' identifier '`' /// @@ -2483,8 +2581,7 @@ void Lexer::lexImpl() { if (unsigned CustomDelimiterLen = advanceIfCustomDelimiter(CurPtr, Diags)) return lexStringLiteral(CustomDelimiterLen); - // If we have experimental string processing enabled, try lex a regex - // literal. + // Try lex a regex literal. if (tryLexRegexLiteral(TokStart)) return; @@ -2505,6 +2602,10 @@ void Lexer::lexImpl() { "Non token comment should be eaten by lexTrivia as LeadingTrivia"); return formToken(tok::comment, TokStart); } + // Try lex a regex literal. + if (tryLexRegexLiteral(TokStart)) + return; + return lexOperatorIdentifier(); case '%': // Lex %[0-9a-zA-Z_]+ as a local SIL value diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index 27a2f9cb021bf..bf75b19430de0 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -511,6 +511,10 @@ ParserResult Parser::parseExprSequenceElement(Diag<> message, ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { SyntaxParsingContext UnaryContext(SyntaxContext, SyntaxContextKind::Expr); UnresolvedDeclRefExpr *Operator; + + // First check to see if we have the start of a regex literal `/.../`. + tryLexRegexLiteral(/*mustBeRegex*/ true); + switch (Tok.getKind()) { default: // If the next token is not an operator, just parse this as expr-postfix. @@ -532,16 +536,32 @@ ParserResult Parser::parseExprUnary(Diag<> Message, bool isExprBasic) { case tok::backslash: return parseExprKeyPath(); - case tok::oper_postfix: + case tok::oper_postfix: { // Postfix operators cannot start a subexpression, but can happen // syntactically because the operator may just follow whatever precedes this // expression (and that may not always be an expression). diagnose(Tok, diag::invalid_postfix_operator); Tok.setKind(tok::oper_prefix); - LLVM_FALLTHROUGH; - case tok::oper_prefix: Operator = parseExprOperator(); break; + } + case tok::oper_prefix: { + // Check to see if we can split a prefix operator containing `/`, e.g `!/`, + // which might be a prefix operator on a regex literal. + if (Context.LangOpts.EnableBareSlashRegexLiterals) { + auto slashIdx = Tok.getText().find("/"); + if (slashIdx != StringRef::npos) { + auto prefix = Tok.getText().take_front(slashIdx); + if (!prefix.empty()) { + Operator = makeExprOperator({Tok.getKind(), prefix}); + consumeStartingCharacterOfCurrentToken(Tok.getKind(), prefix.size()); + break; + } + } + } + Operator = parseExprOperator(); + break; + } case tok::oper_binary_spaced: case tok::oper_binary_unspaced: { // For recovery purposes, accept an oper_binary here. @@ -860,19 +880,52 @@ static DeclRefKind getDeclRefKindForOperator(tok kind) { } } -/// parseExprOperator - Parse an operator reference expression. These -/// are not "proper" expressions; they can only appear in binary/unary -/// operators. -UnresolvedDeclRefExpr *Parser::parseExprOperator() { +UnresolvedDeclRefExpr *Parser::makeExprOperator(const Token &Tok) { assert(Tok.isAnyOperator()); DeclRefKind refKind = getDeclRefKindForOperator(Tok.getKind()); SourceLoc loc = Tok.getLoc(); DeclNameRef name(Context.getIdentifier(Tok.getText())); - consumeToken(); // Bypass local lookup. return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc)); } +/// parseExprOperator - Parse an operator reference expression. These +/// are not "proper" expressions; they can only appear in binary/unary +/// operators. +UnresolvedDeclRefExpr *Parser::parseExprOperator() { + auto *op = makeExprOperator(Tok); + consumeToken(); + return op; +} + +void Parser::tryLexRegexLiteral(bool mustBeRegex) { + if (!Context.LangOpts.EnableBareSlashRegexLiterals) + return; + + // Check to see if we have the start of a regex literal `/.../`. + switch (Tok.getKind()) { + case tok::oper_prefix: + case tok::oper_binary_spaced: + case tok::oper_binary_unspaced: { + if (!Tok.getText().startswith("/")) + break; + + // Try re-lex as a `/.../` regex literal. + auto state = getParserPosition().LS; + L->tryLexForwardSlashRegexLiteralFrom(state, mustBeRegex); + + // Discard the current token, which will be replaced by the re-lexed token, + // which may or may not be a regex literal token. + discardToken(); + + assert(Tok.getText().startswith("/")); + break; + } + default: + break; + } +} + /// parseExprSuper /// /// expr-super: @@ -3160,6 +3213,11 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok, SourceLoc FieldNameLoc; parseOptionalArgumentLabel(FieldName, FieldNameLoc); + // First check to see if we have the start of a regex literal `/.../`. We + // need to do this before handling unapplied operator references, as e.g + // `(/, /)` might be a regex literal. + tryLexRegexLiteral(/*mustBeRegex*/ false); + // See if we have an operator decl ref '()'. The operator token in // this case lexes as a binary operator because it neither leads nor // follows a proper subexpression. diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index d7505c0d3824f..56229a73dd653 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -579,13 +579,16 @@ const Token &Parser::peekToken() { return L->peekNextToken(); } -SourceLoc Parser::consumeTokenWithoutFeedingReceiver() { - SourceLoc Loc = Tok.getLoc(); +SourceLoc Parser::discardToken() { assert(Tok.isNot(tok::eof) && "Lexing past eof!"); + SourceLoc Loc = Tok.getLoc(); + L->lex(Tok, LeadingTrivia, TrailingTrivia); + return Loc; +} +SourceLoc Parser::consumeTokenWithoutFeedingReceiver() { recordTokenHash(Tok); - - L->lex(Tok, LeadingTrivia, TrailingTrivia); + auto Loc = discardToken(); PreviousLoc = Loc; return Loc; } diff --git a/test/IDE/complete_regex.swift b/test/IDE/complete_regex.swift index 7ad3cc02d8ddd..7f2357840c94a 100644 --- a/test/IDE/complete_regex.swift +++ b/test/IDE/complete_regex.swift @@ -4,7 +4,7 @@ // RUN: %target-swift-ide-test -enable-bare-slash-regex -batch-code-completion -source-filename %s -filecheck %raw-FileCheck -completion-output-dir %t func testLiteral() { - #/foo/#.#^RE_LITERAL_MEMBER^# + /foo/.#^RE_LITERAL_MEMBER^# // RE_LITERAL_MEMBER: Begin completions // RE_LITERAL_MEMBER-DAG: Keyword[self]/CurrNominal: self[#Regex#]; // RE_LITERAL_MEMBER: End completions diff --git a/test/SourceKit/Sema/sema_regex.swift b/test/SourceKit/Sema/sema_regex.swift index bb1b421d8f3e1..67ba676978d9e 100644 --- a/test/SourceKit/Sema/sema_regex.swift +++ b/test/SourceKit/Sema/sema_regex.swift @@ -1,5 +1,5 @@ public func retRegex() -> Regex { - #/foo/# + /foo/ } // REQUIRES: swift_in_compiler diff --git a/test/StringProcessing/Parse/forward-slash-regex.swift b/test/StringProcessing/Parse/forward-slash-regex.swift index 5c41aae4c7be1..5f9c0964d860f 100644 --- a/test/StringProcessing/Parse/forward-slash-regex.swift +++ b/test/StringProcessing/Parse/forward-slash-regex.swift @@ -1,5 +1,283 @@ // RUN: %target-typecheck-verify-swift -enable-bare-slash-regex +// REQUIRES: swift_in_compiler +// REQUIRES: concurrency prefix operator / // expected-error {{prefix operator may not contain '/'}} prefix operator ^/ // expected-error {{prefix operator may not contain '/'}} prefix operator /^/ // expected-error {{prefix operator may not contain '/'}} + +precedencegroup P { + associativity: left +} + +// Fine. +infix operator /^/ : P +func /^/ (lhs: Int, rhs: Int) -> Int { 0 } + +let i = 0 /^/ 1/^/3 + +let x = /abc/ +_ = /abc/ +_ = /x/.self +_ = /\// +_ = /\\/ + +// These unfortunately become infix `=/`. We could likely improve the diagnostic +// though. +let z=/0/ +// expected-error@-1 {{type annotation missing in pattern}} +// expected-error@-2 {{consecutive statements on a line must be separated by ';'}} +// expected-error@-3 {{expected expression after unary operator}} +// expected-error@-4 {{cannot find operator '=/' in scope}} +// expected-error@-5 {{'/' is not a postfix unary operator}} +_=/0/ +// expected-error@-1 {{'_' can only appear in a pattern or on the left side of an assignment}} +// expected-error@-2 {{cannot find operator '=/' in scope}} +// expected-error@-3 {{'/' is not a postfix unary operator}} + +_ = /x +// expected-error@-1 {{unterminated regex literal}} + +_ = !/x/ +// expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type 'Bool'}} + +_ = /x/! // expected-error {{cannot force unwrap value of non-optional type 'Regex'}} +_ = /x/ + /y/ // expected-error {{binary operator '+' cannot be applied to two 'Regex' operands}} + +_ = /x/+/y/ +// expected-error@-1 {{cannot find operator '+/' in scope}} +// expected-error@-2 {{'/' is not a postfix unary operator}} +// expected-error@-3 {{cannot find 'y' in scope}} + +_ = /x/?.blah +// expected-error@-1 {{cannot use optional chaining on non-optional value of type 'Regex'}} +// expected-error@-2 {{value of type 'Regex' has no member 'blah'}} +_ = /x/!.blah +// expected-error@-1 {{cannot force unwrap value of non-optional type 'Regex'}} +// expected-error@-2 {{value of type 'Regex' has no member 'blah'}} + +_ = /x /? // expected-error {{cannot use optional chaining on non-optional value of type 'Regex'}} + .blah // expected-error {{value of type 'Regex' has no member 'blah'}} + +_ = 0; /x / // expected-warning {{regular expression literal is unused}} + +_ = /x / ? 0 : 1 // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +_ = .random() ? /x / : .blah // expected-error {{type 'Regex' has no member 'blah'}} + +_ = /x/ ?? /x/ // expected-warning {{left side of nil coalescing operator '??' has non-optional type 'Regex', so the right side is never used}} +_ = /x / ?? /x / // expected-warning {{left side of nil coalescing operator '??' has non-optional type 'Regex', so the right side is never used}} + +_ = /x/??/x/ // expected-error {{'/' is not a postfix unary operator}} + +_ = /x/ ... /y/ // expected-error {{referencing operator function '...' on 'Comparable' requires that 'Regex' conform to 'Comparable'}} + +_ = /x/.../y/ +// expected-error@-1 {{missing whitespace between '...' and '/' operators}} +// expected-error@-2 {{'/' is not a postfix unary operator}} +// expected-error@-3 {{cannot find 'y' in scope}} + +_ = /x /... +// expected-error@-1 {{unary operator '...' cannot be applied to an operand of type 'Regex'}} +// expected-note@-2 {{overloads for '...' exist with these partially matching parameter lists}} + +do { + _ = true / false /; // expected-error {{expected expression after operator}} +} + +_ = "\(/x/)" + +func defaulted(x: Regex = /x/) {} + +func foo(_ x: T, y: T) {} +foo(/abc/, y: /abc /) + +func bar(_ x: inout T) {} + +// TODO: We split this into a prefix '&', but inout is handled specially when +// parsing an argument list. This shouldn't matter anyway, but we should at +// least have a custom diagnostic. +bar(&/x/) +// expected-error@-1 {{'&' is not a prefix unary operator}} + +struct S { + subscript(x: Regex) -> Void { () } +} + +func testSubscript(_ x: S) { + x[/x/] + x[/x /] +} + +func testReturn() -> Regex { + if .random() { + return /x/ + } + return /x / +} + +func testThrow() throws { + throw /x / // expected-error {{thrown expression type 'Regex' does not conform to 'Error'}} +} + +_ = [/abc/, /abc /] +_ = [/abc/:/abc/] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc/ : /abc/] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc /:/abc /] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = [/abc /: /abc /] // expected-error {{generic struct 'Dictionary' requires that 'Regex' conform to 'Hashable'}} +_ = (/abc/, /abc /) +_ = ((/abc /)) + +_ = { /abc/ } +_ = { + /abc/ +} + +let _: () -> Int = { + 0 + / 1 / + 2 +} + +_ = { + 0 // expected-warning {{integer literal is unused}} + /1 / // expected-warning {{regular expression literal is unused}} + 2 // expected-warning {{integer literal is unused}} +} + +// Operator chain, as a regex literal may not start with space. +_ = 2 +/ 1 / .bitWidth + +_ = 2 +/1/ .bitWidth // expected-error {{value of type 'Regex' has no member 'bitWidth'}} + +_ = 2 +/ 1 / + .bitWidth + +_ = 2 +/1 / + .bitWidth // expected-error {{value of type 'Regex' has no member 'bitWidth'}} + +let z = +/y/ + +// While '.' is technically an operator character, it seems more likely that +// the user hasn't written the member name yet. +_ = 0. / 1 / 2 // expected-error {{expected member name following '.'}} +_ = 0 . / 1 / 2 // expected-error {{expected member name following '.'}} + +switch "" { +case /x/: + // expected-error@-1 {{expression pattern of type 'Regex' cannot match values of type 'String'}} + // expected-note@-2 {{overloads for '~=' exist with these partially matching parameter lists: (Substring, String)}} + break +case _ where /x /: + // expected-error@-1 {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} + break +default: + break +} + +do {} catch /x / {} +// expected-error@-1 {{expression pattern of type 'Regex' cannot match values of type 'any Error'}} +// expected-error@-2 {{binary operator '~=' cannot be applied to two 'any Error' operands}} +// expected-warning@-3 {{'catch' block is unreachable because no errors are thrown in 'do' block}} + +switch /x / { +default: + break +} + +if /x / {} // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +if /x /.smth {} // expected-error {{value of type 'Regex' has no member 'smth'}} + +func testGuard() { + guard /x/ else { return } // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} +} + +for x in [0] where /x/ {} // expected-error {{cannot convert value of type 'Regex' to expected condition type 'Bool'}} + +typealias Magic = T +_ = /x/ as Magic +_ = /x/ as! String // expected-warning {{cast from 'Regex' to unrelated type 'String' always fails}} + +_ = type(of: /x /) + +do { + let /x / // expected-error {{expected pattern}} +} + +_ = try /x/; _ = try /x / +// expected-warning@-1 2{{no calls to throwing functions occur within 'try' expression}} +_ = try? /x/; _ = try? /x / +// expected-warning@-1 2{{no calls to throwing functions occur within 'try' expression}} +_ = try! /x/; _ = try! /x / +// expected-warning@-1 2{{no calls to throwing functions occur within 'try' expression}} + +_ = await /x / // expected-warning {{no 'async' operations occur within 'await' expression}} + +/x/ = 0 // expected-error {{cannot assign to value: literals are not mutable}} +/x/() // expected-error {{cannot call value of non-function type 'Regex'}} + +// We treat the following as comments, as it seems more likely the user has +// written a comment and is still in the middle of writing the characters before +// it. +_ = /x// comment +// expected-error@-1 {{unterminated regex literal}} + +_ = /x // comment +// expected-error@-1 {{unterminated regex literal}} + +_ = /x/*comment*/ +// expected-error@-1 {{unterminated regex literal}} + +// These become regex literals, unless surrounded in parens. +func baz(_ x: (Int, Int) -> Int, _ y: (Int, Int) -> Int) {} // expected-note 2{{'baz' declared here}} +baz(/, /) +// expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type '(Int, Int) -> Int'}} +// expected-error@-2 {{missing argument for parameter #2 in call}} +baz(/,/) +// expected-error@-1 {{cannot convert value of type 'Regex' to expected argument type '(Int, Int) -> Int'}} +// expected-error@-2 {{missing argument for parameter #2 in call}} +baz((/), /) + +func qux(_ x: (Int, Int) -> Int, _ y: T) -> Int { 0 } +do { + _ = qux(/, 1) / 2 + // expected-error@-1 {{cannot parse regular expression: closing ')' does not balance any groups openings}} + // expected-error@-2 {{expected ',' separator}} +} +do { + _ = qux(/, "(") / 2 + // expected-error@-1 {{cannot convert value of type 'Regex<(Substring, Substring)>' to expected argument type '(Int, Int) -> Int'}} + // expected-error@-2 {{expected ',' separator}} +} +_ = qux(/, 1) // this comment tests to make sure we don't try and end the regex on the starting '/' of '//'. + +let arr: [Double] = [2, 3, 4] +_ = arr.reduce(1, /) / 3 +_ = arr.reduce(1, /) + arr.reduce(1, /) + +// Fine. +_ = /./ + +// You need to escape if you want a regex literal to start with these characters. +_ = /\ / +_ = / / // expected-error {{regex literal may not start with space; add backslash to escape}} {{6-6=\}} + +_ = /\)/ +_ = /)/ // expected-error {{closing ')' does not balance any groups openings}} + +_ = /,/ +_ = /}/ +_ = /]/ +_ = /:/ +_ = /;/ + +// Don't emit diagnostics here, as we re-lex. +_ = /0xG/ +_ = /0oG/ +_ = /"/ +_ = /'/ +_ = /<#placeholder#>/ diff --git a/test/StringProcessing/Parse/regex.swift b/test/StringProcessing/Parse/regex.swift index b6fb31d647f15..7bb006cf37564 100644 --- a/test/StringProcessing/Parse/regex.swift +++ b/test/StringProcessing/Parse/regex.swift @@ -1,17 +1,20 @@ // RUN: %target-typecheck-verify-swift -enable-bare-slash-regex // REQUIRES: swift_in_compiler +_ = /abc/ _ = #/abc/# _ = ##/abc/## func foo(_ x: T...) {} -foo(#/abc/#, ##/abc/##) +foo(/abc/, #/abc/#, ##/abc/##) -let arr = [#/abc/#, ##/abc/##] +let arr = [/abc/, #/abc/#, ##/abc/##] +_ = /\w+/.self _ = #/\w+/#.self _ = ##/\w+/##.self +_ = /#\/\#\\/ _ = #/#/\/\#\\/# _ = ##/#|\|\#\\/## diff --git a/test/StringProcessing/Parse/regex_parse_error.swift b/test/StringProcessing/Parse/regex_parse_error.swift index f91e8cb5013cd..2dcd918b707eb 100644 --- a/test/StringProcessing/Parse/regex_parse_error.swift +++ b/test/StringProcessing/Parse/regex_parse_error.swift @@ -1,9 +1,11 @@ // RUN: %target-typecheck-verify-swift -enable-bare-slash-regex // REQUIRES: swift_in_compiler +_ = /(/ // expected-error {{expected ')'}} _ = #/(/# // expected-error {{expected ')'}} // FIXME: Should be 'group openings' +_ = /)/ // expected-error {{closing ')' does not balance any groups openings}} _ = #/)/# // expected-error {{closing ')' does not balance any groups openings}} _ = #/\\/''/ // expected-error {{unterminated regex literal}} @@ -25,11 +27,21 @@ do { _ = #/\(?'abc/# -_ = #/\ -/# -// expected-error@-2 {{unterminated regex literal}} -// expected-error@-3 {{expected escape sequence}} -// expected-error@-3 {{expected expression}} +do { + _ = /\ + / + // expected-error@-2 {{unterminated regex literal}} + // expected-error@-3 {{expected escape sequence}} +} // expected-error {{expected expression after operator}} + +do { + _ = #/\ + /# + // expected-error@-2 {{unterminated regex literal}} + // expected-error@-3 {{expected escape sequence}} + // expected-error@-3 {{unterminated regex literal}} + // expected-warning@-4 {{regular expression literal is unused}} +} func foo(_ x: T, _ y: T) {} foo(#/(?/#, #/abc/#) // expected-error {{expected group specifier}}