Skip to content

Commit

Permalink
Revert "Perf/lexer faster slow get char and size (#70543)"
Browse files Browse the repository at this point in the history
This reverts commit d8f5a18.
Breaks build, see:
#70543 (comment)
  • Loading branch information
nico committed Oct 30, 2023
1 parent 072a7ed commit 1c876ff
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 60 deletions.
35 changes: 17 additions & 18 deletions clang/include/clang/Lex/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,23 +575,19 @@ class Lexer : public PreprocessorLexer {
/// sequence.
static bool isNewLineEscaped(const char *BufferStart, const char *Str);

/// Represents a char and the number of bytes parsed to produce it.
struct SizedChar {
char Char;
unsigned Size;
};

/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever
/// emit a warning.
static inline SizedChar getCharAndSizeNoWarn(const char *Ptr,
const LangOptions &LangOpts) {
static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,
const LangOptions &LangOpts) {
// If this is not a trigraph and not a UCN or escaped newline, return
// quickly.
if (isObviouslySimpleCharacter(Ptr[0])) {
return {*Ptr, 1u};
Size = 1;
return *Ptr;
}

return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
Size = 0;
return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
}

/// Returns the leading whitespace for line that corresponds to the given
Expand Down Expand Up @@ -669,7 +665,8 @@ class Lexer : public PreprocessorLexer {
// quickly.
if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;

auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);
unsigned Size = 0;
char C = getCharAndSizeSlow(Ptr, Size, &Tok);
Ptr += Size;
return C;
}
Expand All @@ -685,7 +682,9 @@ class Lexer : public PreprocessorLexer {

// Otherwise, re-lex the character with a current token, allowing
// diagnostics to be emitted and flags to be set.
return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;
Size = 0;
getCharAndSizeSlow(Ptr, Size, &Tok);
return Ptr+Size;
}

/// getCharAndSize - Peek a single 'character' from the specified buffer,
Expand All @@ -700,14 +699,14 @@ class Lexer : public PreprocessorLexer {
return *Ptr;
}

auto CharAndSize = getCharAndSizeSlow(Ptr);
Size = CharAndSize.Size;
return CharAndSize.Char;
Size = 0;
return getCharAndSizeSlow(Ptr, Size);
}

/// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize
/// method.
SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);
char getCharAndSizeSlow(const char *Ptr, unsigned &Size,
Token *Tok = nullptr);

/// getEscapedNewLineSize - Return the size of the specified escaped newline,
/// or 0 if it is not an escaped newline. P[-1] is known to be a "\" on entry
Expand All @@ -721,8 +720,8 @@ class Lexer : public PreprocessorLexer {

/// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
/// diagnostic.
static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
const LangOptions &LangOpts);
static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
const LangOptions &LangOpts);

//===--------------------------------------------------------------------===//
// Other lexer functions.
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/Lex/DependencyDirectivesScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -565,8 +565,9 @@ Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) {
const char *BufPtr = Input.begin() + Tok.Offset;
const char *AfterIdent = Input.begin() + Tok.getEnd();
while (BufPtr < AfterIdent) {
auto [Char, Size] = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts);
Spelling[SpellingLength++] = Char;
unsigned Size;
Spelling[SpellingLength++] =
Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
BufPtr += Size;
}

Expand Down
73 changes: 33 additions & 40 deletions clang/lib/Lex/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
if (tok::isStringLiteral(Tok.getKind())) {
// Munch the encoding-prefix and opening double-quote.
while (BufPtr < BufEnd) {
auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts);
Spelling[Length++] = CharAndSize.Char;
BufPtr += CharAndSize.Size;
unsigned Size;
Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
BufPtr += Size;

if (Spelling[Length - 1] == '"')
break;
Expand All @@ -316,9 +316,9 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
}

while (BufPtr < BufEnd) {
auto CharAndSize = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts);
Spelling[Length++] = CharAndSize.Char;
BufPtr += CharAndSize.Size;
unsigned Size;
Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
BufPtr += Size;
}

assert(Length < Tok.getLength() &&
Expand Down Expand Up @@ -772,9 +772,10 @@ unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo,
// If we have a character that may be a trigraph or escaped newline, use a
// lexer to parse it correctly.
for (; CharNo; --CharNo) {
auto CharAndSize = Lexer::getCharAndSizeNoWarn(TokPtr, LangOpts);
TokPtr += CharAndSize.Size;
PhysOffset += CharAndSize.Size;
unsigned Size;
Lexer::getCharAndSizeNoWarn(TokPtr, Size, LangOpts);
TokPtr += Size;
PhysOffset += Size;
}

// Final detail: if we end up on an escaped newline, we want to return the
Expand Down Expand Up @@ -1356,16 +1357,15 @@ SourceLocation Lexer::findLocationAfterToken(
///
/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
/// be updated to match.
Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
unsigned Size = 0;
char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
Token *Tok) {
// If we have a slash, look for an escaped newline.
if (Ptr[0] == '\\') {
++Size;
++Ptr;
Slash:
// Common case, backslash-char where the char is not whitespace.
if (!isWhitespace(Ptr[0]))
return {'\\', Size};
if (!isWhitespace(Ptr[0])) return '\\';

// See if we have optional whitespace characters between the slash and
// newline.
Expand All @@ -1382,13 +1382,11 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
Ptr += EscapedNewLineSize;

// Use slow version to accumulate a correct size field.
auto CharAndSize = getCharAndSizeSlow(Ptr, Tok);
CharAndSize.Size += Size;
return CharAndSize;
return getCharAndSizeSlow(Ptr, Size, Tok);
}

// Otherwise, this is not an escaped newline, just return the slash.
return {'\\', Size};
return '\\';
}

// If this is a trigraph, process it.
Expand All @@ -1403,12 +1401,13 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
Ptr += 3;
Size += 3;
if (C == '\\') goto Slash;
return {C, Size};
return C;
}
}

// If this is neither, return a single character.
return {*Ptr, Size + 1u};
++Size;
return *Ptr;
}

/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
Expand All @@ -1417,18 +1416,15 @@ Lexer::SizedChar Lexer::getCharAndSizeSlow(const char *Ptr, Token *Tok) {
///
/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
/// be updated to match.
Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
const LangOptions &LangOpts) {

unsigned Size = 0;
char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
const LangOptions &LangOpts) {
// If we have a slash, look for an escaped newline.
if (Ptr[0] == '\\') {
++Size;
++Ptr;
Slash:
// Common case, backslash-char where the char is not whitespace.
if (!isWhitespace(Ptr[0]))
return {'\\', Size};
if (!isWhitespace(Ptr[0])) return '\\';

// See if we have optional whitespace characters followed by a newline.
if (unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
Expand All @@ -1437,13 +1433,11 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
Ptr += EscapedNewLineSize;

// Use slow version to accumulate a correct size field.
auto CharAndSize = getCharAndSizeSlowNoWarn(Ptr, LangOpts);
CharAndSize.Size += Size;
return CharAndSize;
return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
}

// Otherwise, this is not an escaped newline, just return the slash.
return {'\\', Size};
return '\\';
}

// If this is a trigraph, process it.
Expand All @@ -1454,12 +1448,13 @@ Lexer::SizedChar Lexer::getCharAndSizeSlowNoWarn(const char *Ptr,
Ptr += 3;
Size += 3;
if (C == '\\') goto Slash;
return {C, Size};
return C;
}
}

// If this is neither, return a single character.
return {*Ptr, Size + 1u};
++Size;
return *Ptr;
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1969,14 +1964,11 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {
/// isHexaLiteral - Return true if Start points to a hex constant.
/// in microsoft mode (where this is supposed to be several different tokens).
bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
auto CharAndSize1 = Lexer::getCharAndSizeNoWarn(Start, LangOpts);
char C1 = CharAndSize1.Char;
unsigned Size;
char C1 = Lexer::getCharAndSizeNoWarn(Start, Size, LangOpts);
if (C1 != '0')
return false;

auto CharAndSize2 =
Lexer::getCharAndSizeNoWarn(Start + CharAndSize1.Size, LangOpts);
char C2 = CharAndSize2.Char;
char C2 = Lexer::getCharAndSizeNoWarn(Start + Size, Size, LangOpts);
return (C2 == 'x' || C2 == 'X');
}

Expand Down Expand Up @@ -2020,7 +2012,8 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {

// If we have a digit separator, continue.
if (C == '\'' && (LangOpts.CPlusPlus14 || LangOpts.C23)) {
auto [Next, NextSize] = getCharAndSizeNoWarn(CurPtr + Size, LangOpts);
unsigned NextSize;
char Next = getCharAndSizeNoWarn(CurPtr + Size, NextSize, LangOpts);
if (isAsciiIdentifierContinue(Next)) {
if (!isLexingRawMode())
Diag(CurPtr, LangOpts.CPlusPlus
Expand Down Expand Up @@ -2092,8 +2085,8 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
unsigned Consumed = Size;
unsigned Chars = 1;
while (true) {
auto [Next, NextSize] =
getCharAndSizeNoWarn(CurPtr + Consumed, LangOpts);
unsigned NextSize;
char Next = getCharAndSizeNoWarn(CurPtr + Consumed, NextSize, LangOpts);
if (!isAsciiIdentifierContinue(Next)) {
// End of suffix. Check whether this is on the allowed list.
const StringRef CompleteSuffix(Buffer, Chars);
Expand Down

0 comments on commit 1c876ff

Please sign in to comment.