diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index ac0ef14c591bd..899e665e74546 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -575,19 +575,23 @@ class Lexer : public PreprocessorLexer { /// sequence. static bool isNewLineEscaped(const char *BufferStart, const char *Str); + /// Represents a char and the number of bytes parsed to produce it. + struct SizedChar { + char Char; + unsigned Size; + }; + /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever /// emit a warning. - static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &LangOpts) { + static inline SizedChar getCharAndSizeNoWarn(const char *Ptr, + const LangOptions &LangOpts) { // If this is not a trigraph and not a UCN or escaped newline, return // quickly. if (isObviouslySimpleCharacter(Ptr[0])) { - Size = 1; - return *Ptr; + return {*Ptr, 1u}; } - Size = 0; - return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); + return getCharAndSizeSlowNoWarn(Ptr, LangOpts); } /// Returns the leading whitespace for line that corresponds to the given @@ -665,8 +669,7 @@ class Lexer : public PreprocessorLexer { // quickly. if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++; - unsigned Size = 0; - char C = getCharAndSizeSlow(Ptr, Size, &Tok); + auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok); Ptr += Size; return C; } @@ -682,9 +685,7 @@ class Lexer : public PreprocessorLexer { // Otherwise, re-lex the character with a current token, allowing // diagnostics to be emitted and flags to be set. - Size = 0; - getCharAndSizeSlow(Ptr, Size, &Tok); - return Ptr+Size; + return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size; } /// getCharAndSize - Peek a single 'character' from the specified buffer, @@ -699,14 +700,14 @@ class Lexer : public PreprocessorLexer { return *Ptr; } - Size = 0; - return getCharAndSizeSlow(Ptr, Size); + auto CharAndSize = getCharAndSizeSlow(Ptr); + Size = CharAndSize.Size; + return CharAndSize.Char; } /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize /// method. - char getCharAndSizeSlow(const char *Ptr, unsigned &Size, - Token *Tok = nullptr); + SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr); /// getEscapedNewLineSize - Return the size of the specified escaped newline, /// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry @@ -720,8 +721,8 @@ class Lexer : public PreprocessorLexer { /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a /// diagnostic. - static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &LangOpts); + static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr, + const LangOptions &LangOpts); //===--------------------------------------------------------------------===// // Other lexer functions. diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 2bd2c5f8388c0..980f865cf24c9 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -565,9 +565,8 @@ Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) { const char *BufPtr = Input.begin() + Tok.Offset; const char *AfterIdent = Input.begin() + Tok.getEnd(); while (BufPtr < AfterIdent) { - unsigned Size; - Spelling[SpellingLength++] = - Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); + auto [Char, Size] = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); + Spelling[SpellingLength++] = Char; BufPtr += Size; } diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 675ec28e51479..1c53997527732 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -287,9 +287,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, if (tok::isStringLiteral(Tok.getKind())) { // Munch the encoding-prefix and opening double-quote. while (BufPtr < BufEnd) { - unsigned Size; - Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); - BufPtr += Size; + auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); + Spelling[Length++] = CharAndSize.Char; + BufPtr += CharAndSize.Size; if (Spelling[Length - 1] == '"') break; @@ -316,9 +316,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, } while (BufPtr < BufEnd) { - unsigned Size; - Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); - BufPtr += Size; + auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); + Spelling[Length++] = CharAndSize.Char; + BufPtr += CharAndSize.Size; } assert(Length < Tok.getLength() && @@ -772,10 +772,9 @@ unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, // If we have a character that may be a trigraph or escaped newline, use a // lexer to parse it correctly. for (; CharNo; --CharNo) { - unsigned Size; - Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts); - TokPtr += Size; - PhysOffset += Size; + auto CharAndSize = Lexer::getCharAndSizeNoWarn(TokPtr, LangOpts); + TokPtr += CharAndSize.Size; + PhysOffset += CharAndSize.Size; } // Final detail: if we end up on an escaped newline, we want to return the @@ -1357,15 +1356,16 @@ SourceLocation Lexer::findLocationAfterToken( /// /// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should /// be updated to match. -char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, - Token *Tok) { +Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) { + unsigned Size = 0; // If we have a slash, look for an escaped newline. if (Ptr[0] == '\\') { ++Size; ++Ptr; Slash: // Common case, backslash-char where the char is not whitespace. - if (!isWhitespace(Ptr[0])) return '\\'; + if (!isWhitespace(Ptr[0])) + return {'\\', Size}; // See if we have optional whitespace characters between the slash and // newline. @@ -1382,11 +1382,13 @@ char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, Ptr += EscapedNewLineSize; // Use slow version to accumulate a correct size field. - return getCharAndSizeSlow(Ptr, Size, Tok); + auto CharAndSize = getCharAndSizeSlow(Ptr, Tok); + CharAndSize.Size += Size; + return CharAndSize; } // Otherwise, this is not an escaped newline, just return the slash. - return '\\'; + return {'\\', Size}; } // If this is a trigraph, process it. @@ -1401,13 +1403,12 @@ char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, Ptr += 3; Size += 3; if (C == '\\') goto Slash; - return C; + return {C, Size}; } } // If this is neither, return a single character. - ++Size; - return *Ptr; + return {*Ptr, Size + 1u}; } /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the @@ -1416,15 +1417,18 @@ char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, /// /// NOTE: When this method is updated, getCharAndSizeSlow (above) should /// be updated to match. -char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &LangOpts) { +Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, + const LangOptions &LangOpts) { + + unsigned Size = 0; // If we have a slash, look for an escaped newline. if (Ptr[0] == '\\') { ++Size; ++Ptr; Slash: // Common case, backslash-char where the char is not whitespace. - if (!isWhitespace(Ptr[0])) return '\\'; + if (!isWhitespace(Ptr[0])) + return {'\\', Size}; // See if we have optional whitespace characters followed by a newline. if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) { @@ -1433,11 +1437,13 @@ char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, Ptr += EscapedNewLineSize; // Use slow version to accumulate a correct size field. - return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts); + auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts); + CharAndSize.Size += Size; + return CharAndSize; } // Otherwise, this is not an escaped newline, just return the slash. - return '\\'; + return {'\\', Size}; } // If this is a trigraph, process it. @@ -1448,13 +1454,12 @@ char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, Ptr += 3; Size += 3; if (C == '\\') goto Slash; - return C; + return {C, Size}; } } // If this is neither, return a single character. - ++Size; - return *Ptr; + return {*Ptr, Size + 1u}; } //===----------------------------------------------------------------------===// @@ -1964,11 +1969,14 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) { /// isHexaLiteral - Return true if Start points to a hex constant. /// in microsoft mode (where this is supposed to be several different tokens). bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) { - unsigned Size; - char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts); + auto CharAndSize1 = Lexer::getCharAndSizeNoWarn(Start, LangOpts); + char C1 = CharAndSize1.Char; if (C1 != '0') return false; - char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts); + + auto CharAndSize2 = + Lexer::getCharAndSizeNoWarn(Start + CharAndSize1.Size, LangOpts); + char C2 = CharAndSize2.Char; return (C2 == 'x' || C2 == 'X'); } @@ -2012,8 +2020,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { // If we have a digit separator, continue. if (C == '\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) { - unsigned NextSize; - char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, LangOpts); + auto [Next, NextSize] = getCharAndSizeNoWarn(CurPtr + Size, LangOpts); if (isAsciiIdentifierContinue(Next)) { if (!isLexingRawMode()) Diag(CurPtr, LangOpts.CPlusPlus @@ -2085,8 +2092,8 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, unsigned Consumed = Size; unsigned Chars = 1; while (true) { - unsigned NextSize; - char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, LangOpts); + auto [Next, NextSize] = + getCharAndSizeNoWarn(CurPtr + Consumed, LangOpts); if (!isAsciiIdentifierContinue(Next)) { // End of suffix. Check whether this is on the allowed list. const StringRef CompleteSuffix(Buffer, Chars);