Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clang-format] Update FormatToken::isSimpleTypeSpecifier() #79115

Closed
wants to merge 2 commits into from

Conversation

owenca
Copy link
Contributor

@owenca owenca commented Jan 23, 2024

Depends on #80101.

@llvmbot llvmbot added clang Clang issues not falling into any other category clang-format clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Jan 23, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 23, 2024

@llvm/pr-subscribers-clang-format

@llvm/pr-subscribers-clang

Author: Owen Pan (owenca)

Changes

Depends on #79037.


Full diff: https://github.com/llvm/llvm-project/pull/79115.diff

10 Files Affected:

  • (modified) clang/include/clang/Basic/IdentifierTable.h (+4)
  • (modified) clang/include/clang/Format/Format.h (+2)
  • (modified) clang/include/clang/Sema/Sema.h (+1-1)
  • (modified) clang/lib/Basic/IdentifierTable.cpp (+39)
  • (modified) clang/lib/Format/FormatToken.cpp (+2-34)
  • (modified) clang/lib/Format/FormatTokenLexer.cpp (+4-3)
  • (modified) clang/lib/Format/FormatTokenLexer.h (-1)
  • (modified) clang/lib/Parse/ParseExpr.cpp (+2-1)
  • (modified) clang/lib/Parse/ParseObjc.cpp (+2-1)
  • (modified) clang/lib/Sema/SemaDecl.cpp (+2-46)
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 1ac182d4fce26f..2c979e438e81bb 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -427,6 +427,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   /// language.
   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
 
+  /// Return true if this token is a simple type specifier
+  /// in the specified language.
+  bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
+
   /// Get and set FETokenInfo. The language front-end is allowed to associate
   /// arbitrary metadata with this token.
   void *getFETokenInfo() const { return FETokenInfo; }
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index bc9eecd42f9ebf..2800f6db4a9786 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -5160,6 +5160,8 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
                                             ArrayRef<tooling::Range> Ranges,
                                             StringRef FileName = "<stdin>");
 
+extern LangOptions LangOpts;
+
 /// Returns the ``LangOpts`` that the formatter expects you to set.
 ///
 /// \param Style determines specific settings for lexing mode.
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 0db39333b0ee34..bc1fd19b5c6de7 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2636,7 +2636,7 @@ class Sema final {
 
   void DiagnoseUseOfUnimplementedSelectors();
 
-  bool isSimpleTypeSpecifier(tok::TokenKind Kind) const;
+  bool isSimpleTypeSpecifier(const IdentifierInfo &II) const;
 
   ParsedType getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
                          Scope *S, CXXScopeSpec *SS = nullptr,
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index d0d8316385b452..78c783cdff6b5e 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -419,6 +419,45 @@ StringRef IdentifierInfo::deuglifiedName() const {
   return Name;
 }
 
+/// Determine whether the token kind starts a simple-type-specifier.
+bool IdentifierInfo::isSimpleTypeSpecifier(const LangOptions &LangOpts) const {
+  auto Kind = getTokenID();
+
+  switch (Kind) {
+  case tok::kw_short:
+  case tok::kw_long:
+  case tok::kw___int64:
+  case tok::kw___int128:
+  case tok::kw_signed:
+  case tok::kw_unsigned:
+  case tok::kw_void:
+  case tok::kw_char:
+  case tok::kw_int:
+  case tok::kw_half:
+  case tok::kw_float:
+  case tok::kw_double:
+  case tok::kw___bf16:
+  case tok::kw__Float16:
+  case tok::kw___float128:
+  case tok::kw_wchar_t:
+  case tok::kw_bool:
+  case tok::kw___underlying_type:
+  case tok::kw___auto_type:
+  case tok::kw__Bool:
+  case tok::annot_typename:
+  case tok::kw_char16_t:
+  case tok::kw_char32_t:
+  case tok::kw_typeof:
+  case tok::annot_decltype:
+  case tok::kw_decltype:
+  case tok::kw_char8_t:
+    return isKeyword(LangOpts);
+
+  default:
+    return false;
+  }
+}
+
 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // We use a perfect hash function here involving the length of the keyword,
   // the first and third character.  For preprocessor ID's there are no
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index b791c5a26bbe3a..a030d91d5589a5 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -34,41 +34,9 @@ const char *getTokenTypeName(TokenType Type) {
   return nullptr;
 }
 
-// FIXME: This is copy&pasted from Sema. Put it in a common place and remove
-// duplication.
 bool FormatToken::isSimpleTypeSpecifier() const {
-  switch (Tok.getKind()) {
-  case tok::kw_short:
-  case tok::kw_long:
-  case tok::kw___int64:
-  case tok::kw___int128:
-  case tok::kw_signed:
-  case tok::kw_unsigned:
-  case tok::kw_void:
-  case tok::kw_char:
-  case tok::kw_int:
-  case tok::kw_half:
-  case tok::kw_float:
-  case tok::kw_double:
-  case tok::kw___bf16:
-  case tok::kw__Float16:
-  case tok::kw___float128:
-  case tok::kw___ibm128:
-  case tok::kw_wchar_t:
-  case tok::kw_bool:
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
-#include "clang/Basic/TransformTypeTraits.def"
-  case tok::annot_typename:
-  case tok::kw_char8_t:
-  case tok::kw_char16_t:
-  case tok::kw_char32_t:
-  case tok::kw_typeof:
-  case tok::kw_decltype:
-  case tok::kw__Atomic:
-    return true;
-  default:
-    return false;
-  }
+  const auto *IdentifierInfo = Tok.getIdentifierInfo();
+  return IdentifierInfo && IdentifierInfo->isSimpleTypeSpecifier(LangOpts);
 }
 
 bool FormatToken::isTypeOrIdentifier() const {
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 52a55ea23b5f2f..e6575dd417438a 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -22,18 +22,20 @@
 namespace clang {
 namespace format {
 
+LangOptions LangOpts;
+
 FormatTokenLexer::FormatTokenLexer(
     const SourceManager &SourceMgr, FileID ID, unsigned Column,
     const FormatStyle &Style, encoding::Encoding Encoding,
     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
     IdentifierTable &IdentTable)
     : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}),
-      Column(Column), TrailingWhitespace(0),
-      LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID),
+      Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
       Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
       Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
       FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),
       MacroBlockEndRegex(Style.MacroBlockEnd) {
+  LangOpts = getFormattingLangOpts(Style);
   Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));
   Lex->SetKeepWhitespaceMode(true);
 
@@ -1411,7 +1413,6 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) {
 
 void FormatTokenLexer::resetLexer(unsigned Offset) {
   StringRef Buffer = SourceMgr.getBufferData(ID);
-  LangOpts = getFormattingLangOpts(Style);
   Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), LangOpts,
                       Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));
   Lex->SetKeepWhitespaceMode(true);
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 65dd733bd53352..52838f1d8a17f5 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -120,7 +120,6 @@ class FormatTokenLexer {
   unsigned Column;
   unsigned TrailingWhitespace;
   std::unique_ptr<Lexer> Lex;
-  LangOptions LangOpts;
   const SourceManager &SourceMgr;
   FileID ID;
   const FormatStyle &Style;
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index e862856a08ca11..8f9f918bf544fd 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1597,7 +1597,8 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
       if (TryAnnotateTypeOrScopeToken())
         return ExprError();
 
-      if (!Actions.isSimpleTypeSpecifier(Tok.getKind()))
+      if (!Tok.getIdentifierInfo() ||
+          !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo()))
         // We are trying to parse a simple-type-specifier but might not get such
         // a token after error recovery.
         return ExprError();
diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp
index 849fd1ac95a442..5565770610c491 100644
--- a/clang/lib/Parse/ParseObjc.cpp
+++ b/clang/lib/Parse/ParseObjc.cpp
@@ -2971,7 +2971,8 @@ bool Parser::ParseObjCXXMessageReceiver(bool &IsExpr, void *&TypeOrExpr) {
                   tok::annot_cxxscope))
     TryAnnotateTypeOrScopeToken();
 
-  if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) {
+  if (!Tok.getIdentifierInfo() ||
+      !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo())) {
     //   objc-receiver:
     //     expression
     // Make sure any typos in the receiver are corrected or diagnosed, so that
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 8dff2cdc063df3..a20894adda00f9 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -128,52 +128,8 @@ class TypeNameValidatorCCC final : public CorrectionCandidateCallback {
 } // end anonymous namespace
 
 /// Determine whether the token kind starts a simple-type-specifier.
-bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
-  switch (Kind) {
-  // FIXME: Take into account the current language when deciding whether a
-  // token kind is a valid type specifier
-  case tok::kw_short:
-  case tok::kw_long:
-  case tok::kw___int64:
-  case tok::kw___int128:
-  case tok::kw_signed:
-  case tok::kw_unsigned:
-  case tok::kw_void:
-  case tok::kw_char:
-  case tok::kw_int:
-  case tok::kw_half:
-  case tok::kw_float:
-  case tok::kw_double:
-  case tok::kw___bf16:
-  case tok::kw__Float16:
-  case tok::kw___float128:
-  case tok::kw___ibm128:
-  case tok::kw_wchar_t:
-  case tok::kw_bool:
-  case tok::kw__Accum:
-  case tok::kw__Fract:
-  case tok::kw__Sat:
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
-#include "clang/Basic/TransformTypeTraits.def"
-  case tok::kw___auto_type:
-    return true;
-
-  case tok::annot_typename:
-  case tok::kw_char16_t:
-  case tok::kw_char32_t:
-  case tok::kw_typeof:
-  case tok::annot_decltype:
-  case tok::kw_decltype:
-    return getLangOpts().CPlusPlus;
-
-  case tok::kw_char8_t:
-    return getLangOpts().Char8;
-
-  default:
-    break;
-  }
-
-  return false;
+bool Sema::isSimpleTypeSpecifier(const IdentifierInfo &II) const {
+  return II.isSimpleTypeSpecifier(getLangOpts());
 }
 
 namespace {

@owenca owenca marked this pull request as draft January 23, 2024 10:36
@owenca owenca removed clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" labels Jan 23, 2024
So that it can be used by clang-format.
@owenca
Copy link
Contributor Author

owenca commented Feb 1, 2024

See #80241.

@owenca owenca closed this Feb 1, 2024
@owenca owenca deleted the isSimpleTypeSpecifier branch February 2, 2024 11:08
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

2 participants