From 0b4b6a654edf6fa2781e1c80983c124a0d0088c5 Mon Sep 17 00:00:00 2001 From: ThePhD Date: Thu, 28 Sep 2023 18:31:34 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20[Sema,=20Driver,=20Lex,=20Frontend]?= =?UTF-8?q?=20Implement=20naive=20#embed=20for=20C23=20and=20C++26.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🛠 [Frontend] Ensure commas inserted by #embed are properly serialized to output --- clang/CMakeLists.txt | 3 +- clang/include/clang/Basic/Builtins.def | 3 + clang/include/clang/Basic/DiagnosticGroups.td | 6 + .../include/clang/Basic/DiagnosticLexKinds.td | 24 +- clang/include/clang/Basic/FileManager.h | 8 +- clang/include/clang/Basic/TokenKinds.def | 7 + clang/include/clang/Driver/Options.td | 16 + .../Frontend/PreprocessorOutputOptions.h | 2 + clang/include/clang/Lex/PPCallbacks.h | 77 ++- clang/include/clang/Lex/Preprocessor.h | 66 ++- clang/include/clang/Lex/PreprocessorOptions.h | 7 + clang/lib/Basic/FileManager.cpp | 8 +- clang/lib/Basic/IdentifierTable.cpp | 3 +- clang/lib/Driver/ToolChains/Clang.cpp | 5 +- clang/lib/Format/FormatToken.h | 2 + clang/lib/Format/TokenAnnotator.cpp | 28 + clang/lib/Frontend/CompilerInvocation.cpp | 19 + clang/lib/Frontend/DependencyFile.cpp | 29 + clang/lib/Frontend/DependencyGraph.cpp | 43 +- clang/lib/Frontend/InitPreprocessor.cpp | 7 + .../lib/Frontend/PrintPreprocessedOutput.cpp | 25 +- .../Frontend/Rewrite/InclusionRewriter.cpp | 13 + clang/lib/Lex/PPCallbacks.cpp | 11 - clang/lib/Lex/PPDirectives.cpp | 500 ++++++++++++++++++ clang/lib/Lex/PPExpressions.cpp | 44 +- clang/lib/Lex/PPMacroExpansion.cpp | 120 +++++ clang/test/Preprocessor/Inputs/jk.txt | 1 + clang/test/Preprocessor/Inputs/media/art.txt | 9 + clang/test/Preprocessor/Inputs/media/empty | 0 .../test/Preprocessor/Inputs/single_byte.txt | 1 + clang/test/Preprocessor/embed___has_embed.c | 34 ++ .../embed___has_embed_supported.c | 24 + .../test/Preprocessor/embed_feature_test.cpp | 13 + .../test/Preprocessor/embed_file_not_found.c | 4 + clang/test/Preprocessor/embed_init.c | 28 + .../Preprocessor/embed_parameter_if_empty.c | 16 + .../test/Preprocessor/embed_parameter_limit.c | 15 + .../Preprocessor/embed_parameter_offset.c | 15 + .../Preprocessor/embed_parameter_prefix.c | 15 + .../Preprocessor/embed_parameter_suffix.c | 15 + .../embed_parameter_unrecognized.c | 8 + clang/test/Preprocessor/embed_path_chevron.c | 8 + clang/test/Preprocessor/embed_path_quote.c | 8 + clang/test/Preprocessor/single_byte.txt | 1 + llvm/CMakeLists.txt | 7 + llvm/cmake/modules/GetHostTriple.cmake | 6 +- 46 files changed, 1264 insertions(+), 40 deletions(-) create mode 100644 clang/test/Preprocessor/Inputs/jk.txt create mode 100644 clang/test/Preprocessor/Inputs/media/art.txt create mode 100644 clang/test/Preprocessor/Inputs/media/empty create mode 100644 clang/test/Preprocessor/Inputs/single_byte.txt create mode 100644 clang/test/Preprocessor/embed___has_embed.c create mode 100644 clang/test/Preprocessor/embed___has_embed_supported.c create mode 100644 clang/test/Preprocessor/embed_feature_test.cpp create mode 100644 clang/test/Preprocessor/embed_file_not_found.c create mode 100644 clang/test/Preprocessor/embed_init.c create mode 100644 clang/test/Preprocessor/embed_parameter_if_empty.c create mode 100644 clang/test/Preprocessor/embed_parameter_limit.c create mode 100644 clang/test/Preprocessor/embed_parameter_offset.c create mode 100644 clang/test/Preprocessor/embed_parameter_prefix.c create mode 100644 clang/test/Preprocessor/embed_parameter_suffix.c create mode 100644 clang/test/Preprocessor/embed_parameter_unrecognized.c create mode 100644 clang/test/Preprocessor/embed_path_chevron.c create mode 100644 clang/test/Preprocessor/embed_path_quote.c create mode 100644 clang/test/Preprocessor/single_byte.txt diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 9b52c58be41e7..1b88905da3b85 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -300,6 +300,7 @@ configure_file( ${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc) # Add appropriate flags for GCC +option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) if (LLVM_COMPILER_IS_GCC_COMPATIBLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual") if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") @@ -307,7 +308,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE) endif () # Enable -pedantic for Clang even if it's not enabled for LLVM. - if (NOT LLVM_ENABLE_PEDANTIC) + if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long") endif () diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 6ea8484606cfd..0dfc6456daf05 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1766,6 +1766,9 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") // Arithmetic Fence: to prevent FP reordering and reassociation optimizations LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES) +// preprocessor embed builtin +LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES) + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 0b09c00219184..89f6715cebfdc 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -708,6 +708,12 @@ def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">; def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>; def RestrictExpansionMacro : DiagGroup<"restrict-expansion">; def FinalMacro : DiagGroup<"final-macro">; +// Warnings about unknown preprocessor parameters (e.g. `#embed` and extensions) +def UnsupportedDirective : DiagGroup<"unsupported-directive">; +def UnknownDirectiveParameters : DiagGroup<"unknown-directive-parameters">; +def IgnoredDirectiveParameters : DiagGroup<"ignored-directive-parameters">; +def DirectiveParameters : DiagGroup<"directive-parameters", + [UnknownDirectiveParameters, IgnoredDirectiveParameters]>; // Just silence warnings about -Wstrict-aliasing for now. def : DiagGroup<"strict-aliasing=0">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 940cca6736849..4490f40806b03 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -422,6 +422,22 @@ def warn_cxx23_compat_warning_directive : Warning< def warn_c23_compat_warning_directive : Warning< "#warning is incompatible with C standards before C23">, InGroup, DefaultIgnore; +def warn_c23_pp_embed : Warning< + "'__has_embed' is a C23 extension">, + InGroup, + DefaultIgnore; +def warn_c23_pp_has_embed : Warning< + "'__has_embed' is a C23 extension">, + InGroup, + DefaultIgnore; +def warn_cxx26_pp_embed : Warning< + "'__has_embed' is a C++26 extension">, + InGroup, + DefaultIgnore; +def warn_cxx26_pp_has_embed : Warning< + "'__has_embed' is a C++26 extension">, + InGroup, + DefaultIgnore; def ext_pp_extra_tokens_at_eol : ExtWarn< "extra tokens at end of #%0 directive">, InGroup; @@ -483,7 +499,13 @@ def ext_pp_gnu_line_directive : Extension< def err_pp_invalid_directive : Error< "invalid preprocessing directive%select{|, did you mean '#%1'?}0">; def warn_pp_invalid_directive : Warning< - err_pp_invalid_directive.Summary>, InGroup>; + err_pp_invalid_directive.Summary>, + InGroup; +def warn_pp_unknown_parameter_ignored : Warning< + "unknown%select{ | embed}0 preprocessor parameter '%1' ignored">, + InGroup; +def err_pp_unsupported_directive : Error< + "unsupported%select{ | embed}0 directive: %1">; def err_pp_directive_required : Error< "%0 must be used within a preprocessing directive">; def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal; diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 56cb093dd8c37..c757f8775b425 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -276,11 +276,13 @@ class FileManager : public RefCountedBase { /// MemoryBuffer if successful, otherwise returning null. llvm::ErrorOr> getBufferForFile(FileEntryRef Entry, bool isVolatile = false, - bool RequiresNullTerminator = true); + bool RequiresNullTerminator = true, + std::optional MaybeLimit = std::nullopt); llvm::ErrorOr> getBufferForFile(StringRef Filename, bool isVolatile = false, - bool RequiresNullTerminator = true) { - return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile, + bool RequiresNullTerminator = true, + std::optional MaybeLimit = std::nullopt) { + return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile, RequiresNullTerminator); } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 94db56a9fd5d7..19a66fbb07311 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -126,6 +126,9 @@ PPKEYWORD(error) // C99 6.10.6 - Pragma Directive. PPKEYWORD(pragma) +// C23 & C++26 #embed +PPKEYWORD(embed) + // GNU Extensions. PPKEYWORD(import) PPKEYWORD(include_next) @@ -151,6 +154,10 @@ TOK(eod) // End of preprocessing directive (end of line inside a // directive). TOK(code_completion) // Code completion marker +// #embed speed support +TOK(builtin_embed) + + // C99 6.4.9: Comments. TOK(comment) // Comment (only in -E -C[C] mode) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5415b18d3f406..bfc4b15d5411c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -114,6 +114,11 @@ def IncludePath_Group : OptionGroup<"">, Group, DocBrief<[{ Flags controlling how ``#include``\s are resolved to files.}]>; +def EmbedPath_Group : OptionGroup<"">, Group, + DocName<"Embed path management">, + DocBrief<[{ +Flags controlling how ``#embed``\s and similar are resolved to files.}]>; + def I_Group : OptionGroup<"">, Group, DocFlatten; def i_Group : OptionGroup<"">, Group, DocFlatten; def clang_i_Group : OptionGroup<"">, Group, DocFlatten; @@ -816,6 +821,14 @@ will be ignored}]>; def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group, Visibility<[ClangOption, FlangOption]>, MetaVarName<"">, HelpText<"Add directory to library search path">; +def embed_dir : JoinedOrSeparate<["-"], "embed-dir">, + Flags<[RenderJoined]>, Group, + Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>, + MetaVarName<"">, HelpText<"Add directory to embed search path">; +def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">, + Flags<[RenderJoined]>, Group, + Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>, + MetaVarName<"">, HelpText<"Add directory to embed search path">; def MD : Flag<["-"], "MD">, Group, HelpText<"Write a depfile containing user and system headers">; def MMD : Flag<["-"], "MMD">, Group, @@ -1353,6 +1366,9 @@ def dD : Flag<["-"], "dD">, Group, Visibility<[ClangOption, CC1Option]> def dI : Flag<["-"], "dI">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Print include directives in -E mode in addition to normal output">, MarshallingInfoFlag>; +def dE : Flag<["-"], "dE">, Group, Visibility<[ClangOption, CC1Option]>, + HelpText<"Print embed directives in -E mode in addition to normal output">, + MarshallingInfoFlag>; def dM : Flag<["-"], "dM">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Print macro definitions in -E mode instead of normal output">; def dead__strip : Flag<["-"], "dead_strip">; diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index db2ec9f2ae206..3e36db3f8ce46 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -22,6 +22,7 @@ class PreprocessorOutputOptions { unsigned ShowMacroComments : 1; ///< Show comments, even in macros. unsigned ShowMacros : 1; ///< Print macro definitions. unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. + unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed output. unsigned RewriteIncludes : 1; ///< Preprocess include directives only. unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input. @@ -37,6 +38,7 @@ class PreprocessorOutputOptions { ShowMacroComments = 0; ShowMacros = 0; ShowIncludeDirectives = 0; + ShowEmbedDirectives = 0; RewriteIncludes = 0; RewriteImports = 0; MinimizeWhitespace = 0; diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h index 94f96cf9c5125..921bf159ead57 100644 --- a/clang/include/clang/Lex/PPCallbacks.h +++ b/clang/include/clang/Lex/PPCallbacks.h @@ -83,6 +83,47 @@ class PPCallbacks { const Token &FilenameTok, SrcMgr::CharacteristicKind FileType) {} + /// Callback invoked whenever the preprocessor cannot find a file for an + /// embed directive. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \returns true to indicate that the preprocessor should skip this file + /// and not issue any diagnostic. + virtual bool EmbedFileNotFound(StringRef FileName) { return false; } + + /// Callback invoked whenever an embed directive has been processed, + /// regardless of whether the embed will actually find a file. + /// + /// \param HashLoc The location of the '#' that starts the embed directive. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \param IsAngled Whether the file name was enclosed in angle brackets; + /// otherwise, it was enclosed in quotes. + /// + /// \param FilenameRange The character range of the quotes or angle brackets + /// for the written file name. + /// + /// \param ParametersRange The character range of the embed parameters. An + /// empty range if there were no parameters. + /// + /// \param File The actual file that may be included by this embed directive. + /// + /// \param SearchPath Contains the search path which was used to find the file + /// in the file system. If the file was found via an absolute path, + /// SearchPath will be empty. + /// + /// \param RelativePath The path relative to SearchPath, at which the resource + /// file was found. This is equal to FileName. + virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName, + bool IsAngled, CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) {} + /// Callback invoked whenever the preprocessor cannot find a file for an /// inclusion directive. /// @@ -330,11 +371,15 @@ class PPCallbacks { SourceRange Range) { } + /// Hook called when a '__has_embed' directive is read. + virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File) {} + /// Hook called when a '__has_include' or '__has_include_next' directive is /// read. virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType); + SrcMgr::CharacteristicKind FileType) {} /// Hook called when a source range is skipped. /// \param Range The SourceRange that was skipped. The range begins at the @@ -461,6 +506,25 @@ class PPChainedCallbacks : public PPCallbacks { Second->FileSkipped(SkippedFile, FilenameTok, FileType); } + bool EmbedFileNotFound(StringRef FileName) override { + bool Skip = First->FileNotFound(FileName); + // Make sure to invoke the second callback, no matter if the first already + // returned true to skip the file. + Skip |= Second->FileNotFound(FileName); + return Skip; + } + + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override { + First->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange, + ParametersRange, File, SearchPath, RelativePath); + Second->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange, + ParametersRange, File, SearchPath, RelativePath); + } + bool FileNotFound(StringRef FileName) override { bool Skip = First->FileNotFound(FileName); // Make sure to invoke the second callback, no matter if the first already @@ -561,9 +625,18 @@ class PPChainedCallbacks : public PPCallbacks { Second->PragmaDiagnostic(Loc, Namespace, mapping, Str); } + void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File) override { + First->HasEmbed(Loc, FileName, IsAngled, File); + Second->HasEmbed(Loc, FileName, IsAngled, File); + } + void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType) override; + SrcMgr::CharacteristicKind FileType) override { + First->HasInclude(Loc, FileName, IsAngled, File, FileType); + Second->HasInclude(Loc, FileName, IsAngled, File, FileType); + } void PragmaOpenCLExtension(SourceLocation NameLoc, const IdentifierInfo *Name, SourceLocation StateLoc, unsigned State) override { diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 18d88407ae12c..7470bf5882730 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -31,6 +31,7 @@ #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -53,6 +54,7 @@ #include #include #include +#include #include namespace llvm { @@ -165,6 +167,7 @@ class Preprocessor { IdentifierInfo *Ident__has_builtin; // __has_builtin IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin IdentifierInfo *Ident__has_attribute; // __has_attribute + IdentifierInfo *Ident__has_embed; // __has_embed IdentifierInfo *Ident__has_include; // __has_include IdentifierInfo *Ident__has_include_next; // __has_include_next IdentifierInfo *Ident__has_warning; // __has_warning @@ -206,7 +209,10 @@ class Preprocessor { enum { /// Maximum depth of \#includes. - MaxAllowedIncludeStackDepth = 200 + MaxAllowedIncludeStackDepth = 200, + VALUE__STDC_EMBED_NOT_FOUND__ = 0, + VALUE__STDC_EMBED_FOUND__ = 1, + VALUE__STDC_EMBED_EMPTY__ = 2, }; // State that is set before the preprocessor begins. @@ -1728,6 +1734,22 @@ class Preprocessor { /// Lex a token, forming a header-name token if possible. bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); + struct LexEmbedParametersResult { + bool Successful; + std::optional MaybeLimitParam; + std::optional MaybeOffsetParam; + std::optional> MaybeIfEmptyParam; + std::optional> MaybePrefixParam; + std::optional> MaybeSuffixParam; + int UnrecognizedParams; + SourceLocation StartLoc; + SourceLocation EndLoc; + }; + + LexEmbedParametersResult LexEmbedParameters(Token &Current, + bool InHasEmbed = false, + bool DiagnoseUnknown = true); + bool LexAfterModuleImport(Token &Result); void CollectPpImportSuffix(SmallVectorImpl &Toks); @@ -2413,6 +2435,17 @@ class Preprocessor { bool *IsFrameworkFound, bool SkipCache = false, bool OpenFile = true, bool CacheFailures = true); + /// Given a "foo" or \ reference, look up the indicated embed resource. + /// + /// Returns std::nullopt on failure. \p isAngled indicates whether the file + /// reference is for system \#include's or not (i.e. using <> instead of ""). + OptionalFileEntryRef + LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, + bool OpenFile, + const FileEntry *LookupFromFile = nullptr, + SmallVectorImpl *SearchPath = nullptr, + SmallVectorImpl *RelativePath = nullptr); + /// Return true if we're in the top-level file, not in a \#include. bool isInPrimaryFile() const; @@ -2517,6 +2550,9 @@ class Preprocessor { /// Information about the result for evaluating an expression for a /// preprocessor directive. struct DirectiveEvalResult { + /// The integral value of the expression. + std::optional Value; + /// Whether the expression was evaluated as true or not. bool Conditional; @@ -2531,7 +2567,24 @@ class Preprocessor { /// \#if or \#elif directive and return a \p DirectiveEvalResult object. /// /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. - DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); + DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + bool CheckForEoD = true, + bool Parenthesized = false); + + /// Evaluate an integer constant expression that may occur after a + /// \#if or \#elif directive and return a \p DirectiveEvalResult object. + /// + /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. + DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + Token &Tok, + bool CheckForEoD = true, + bool Parenthesized = false); + + /// Process a '__has_embed("path" [, ...])' expression. + /// + /// Returns predefined `__STDC_EMBED_*` macro values if + /// successful. + int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II); /// Process a '__has_include("path")' expression. /// @@ -2679,6 +2732,15 @@ class Preprocessor { const FileEntry *LookupFromFile, StringRef &LookupFilename, SmallVectorImpl &RelativePath, SmallVectorImpl &SearchPath, ModuleMap::KnownHeader &SuggestedModule, bool isAngled); + // Binary data inclusion + void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, + const FileEntry *LookupFromFile = nullptr); + void HandleEmbedDirectiveNaive( + SourceLocation FilenameTok, LexEmbedParametersResult &Params, + StringRef BinaryContents, const size_t TargetCharWidth); + void HandleEmbedDirectiveBuiltin( + SourceLocation FilenameTok, LexEmbedParametersResult &Params, + StringRef BinaryContents, const size_t TargetCharWidth); // File inclusion. void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 058194bcde72e..23f3458d79e03 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -167,6 +167,13 @@ class PreprocessorOptions { /// of the specified memory buffer (the second part of each pair). std::vector> RemappedFileBuffers; + /// User specified embed entries. + std::vector EmbedEntries; + + /// Whether or not naive expansion should be used all the time for + /// builtin embed + bool NoBuiltinPPEmbed = false; + /// Whether the compiler instance should retain (i.e., not free) /// the buffers associated with remapped files. /// diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index d16626b106521..e0e80b5e0fbed 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -537,13 +537,19 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) { llvm::ErrorOr> FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, - bool RequiresNullTerminator) { + bool RequiresNullTerminator, + std::optional MaybeLimit) { const FileEntry *Entry = &FE.getFileEntry(); // If the content is living on the file entry, return a reference to it. if (Entry->Content) return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef()); uint64_t FileSize = Entry->getSize(); + + if (MaybeLimit) + FileSize = *MaybeLimit; + + // If there's a high enough chance that the file have changed since we // got its size, force a stat before opening it. if (isVolatile || Entry->isNamedPipe()) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index e5599d5455410..d2b5426d27bb3 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -423,7 +423,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { // case values). Note that this depends on 'if' being null terminated. #define HASH(LEN, FIRST, THIRD) \ - (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) + (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63) #define CASE(LEN, FIRST, THIRD, NAME) \ case HASH(LEN, FIRST, THIRD): \ return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME @@ -438,6 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 4, 'e', 's', else); CASE( 4, 'l', 'n', line); CASE( 4, 's', 'c', sccs); + CASE( 5, 'e', 'b', embed); CASE( 5, 'e', 'd', endif); CASE( 5, 'e', 'r', error); CASE( 5, 'i', 'e', ident); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index bfd6c5c2864ab..a8d51179a9ba5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1324,7 +1324,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I_Group, - options::OPT_F, options::OPT_index_header_map}); + options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group}); // Add -Wp, and -Xpreprocessor if using the preprocessor. @@ -8182,6 +8182,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, // Pass along any -I options so we get proper .include search paths. Args.AddAllArgs(CmdArgs, options::OPT_I_Group); + // Pass along any -embed-dir or similar options so we get proper embed paths. + Args.AddAllArgs(CmdArgs, options::OPT_EmbedPath_Group); + // Determine the original source input. auto FindSource = [](const Action *S) -> const Action * { while (S->getKind() != Action::InputClass) { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 527f1d744a580..97fd574655856 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -1006,6 +1006,7 @@ struct AdditionalKeywords { kw_synchronized = &IdentTable.get("synchronized"); kw_throws = &IdentTable.get("throws"); kw___except = &IdentTable.get("__except"); + kw___has_embed = &IdentTable.get("__has_embed"); kw___has_include = &IdentTable.get("__has_include"); kw___has_include_next = &IdentTable.get("__has_include_next"); @@ -1303,6 +1304,7 @@ struct AdditionalKeywords { IdentifierInfo *kw_NS_ERROR_ENUM; IdentifierInfo *kw_NS_OPTIONS; IdentifierInfo *kw___except; + IdentifierInfo *kw___has_embed; IdentifierInfo *kw___has_include; IdentifierInfo *kw___has_include_next; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 543c119620bf2..e405a9085951d 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1400,6 +1400,9 @@ class AnnotatingParser { Keywords.kw___has_include_next)) { parseHasInclude(); } + else if (Tok->is(Keywords.kw___has_embed)) { + parseHasEmbed(); + } if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && Tok->Next->isNot(tok::l_paren)) { Tok->setType(TT_CSharpGenericTypeConstraint); @@ -1464,6 +1467,21 @@ class AnnotatingParser { } } + void parseEmbedDirective() { + if (CurrentToken && CurrentToken->is(tok::less)) { + next(); + while (CurrentToken) { + // Mark tokens up to the trailing line comments as implicit string + // literals. + if (CurrentToken->isNot(tok::comment) && + !CurrentToken->TokenText.startswith("//")) { + CurrentToken->setType(TT_ImplicitStringLiteral); + } + next(); + } + } + } + void parseWarningOrError() { next(); // We still want to format the whitespace left of the first token of the @@ -1500,6 +1518,14 @@ class AnnotatingParser { next(); // ')' } + void parseHasEmbed() { + if (!CurrentToken || CurrentToken->isNot(tok::l_paren)) + return; + next(); // '(' + parseEmbedDirective(); + next(); // ')' + } + LineType parsePreprocessorDirective() { bool IsFirstToken = CurrentToken->IsFirst; LineType Type = LT_PreprocessorDirective; @@ -1563,6 +1589,8 @@ class AnnotatingParser { } else if (Tok->isOneOf(Keywords.kw___has_include, Keywords.kw___has_include_next)) { parseHasInclude(); + } else if (Tok->is(Keywords.kw___has_embed)) { + parseHasEmbed(); } } return Type; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index bb442495f5835..05406b5d42d73 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4302,6 +4302,12 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts, if (Opts.SourceDateEpoch) GenerateArg(Consumer, OPT_source_date_epoch, Twine(*Opts.SourceDateEpoch)); + for (const auto &EmbedEntry : Opts.EmbedEntries) + GenerateArg(Consumer, OPT_embed_dir, EmbedEntry); + + if (Opts.NoBuiltinPPEmbed) + GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed"); + // Don't handle LexEditorPlaceholders. It is implied by the action that is // generated elsewhere. } @@ -4394,6 +4400,19 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, } } + for (const auto *A : Args.filtered(OPT_embed_dir, OPT_embed_dir_EQ)) { + StringRef Val = A->getValue(); + Opts.EmbedEntries.push_back(std::string(Val)); + } + + // Can disable the internal embed builtin / token + for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) { + StringRef Val = A->getValue(); + if (Val == "pp_embed") { + Opts.NoBuiltinPPEmbed = true; + } + } + // Always avoid lexing editor placeholders when we're just running the // preprocessor as we never want to emit the // "editor placeholder in source file" error in PP only mode. diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp index c2f6f41ae291e..10558b1d34bf6 100644 --- a/clang/lib/Frontend/DependencyFile.cpp +++ b/clang/lib/Frontend/DependencyFile.cpp @@ -65,6 +65,21 @@ struct DepCollectorPPCallbacks : public PPCallbacks { /*IsMissing=*/false); } + void EmbedDirective(SourceLocation HashLoc, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override { + if (!File) + DepCollector.maybeAddDependency(FileName, + /*FromModule*/ false, + /*IsSystem*/ false, + /*IsModuleFile*/ false, + &PP.getFileManager(), + /*IsMissing*/ true); + // Files that actually exist are handled by FileChanged. + } + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -81,6 +96,20 @@ struct DepCollectorPPCallbacks : public PPCallbacks { // Files that actually exist are handled by FileChanged. } + void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled, + OptionalFileEntryRef File) override { + if (!File) + return; + StringRef Filename = + llvm::sys::path::remove_leading_dotslash(File->getName()); + DepCollector.maybeAddDependency(Filename, + /*FromModule=*/false, + false, + /*IsModuleFile=*/false, + &PP.getFileManager(), + /*IsMissing=*/false); + } + void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled, OptionalFileEntryRef File, SrcMgr::CharacteristicKind FileType) override { diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp index 6aad04370f6e7..683f751a94244 100644 --- a/clang/lib/Frontend/DependencyGraph.cpp +++ b/clang/lib/Frontend/DependencyGraph.cpp @@ -26,6 +26,14 @@ namespace DOT = llvm::DOT; namespace { class DependencyGraphCallback : public PPCallbacks { +public: + enum DirectiveBehavior { + Normal = 0, + IgnoreEmbed = 0b01, + IgnoreInclude = 0b10, + }; + +private: const Preprocessor *PP; std::string OutputFile; std::string SysRoot; @@ -34,6 +42,7 @@ class DependencyGraphCallback : public PPCallbacks { llvm::DenseMap>; DependencyMap Dependencies; + DirectiveBehavior Behavior; private: raw_ostream &writeNodeReference(raw_ostream &OS, @@ -42,7 +51,8 @@ class DependencyGraphCallback : public PPCallbacks { public: DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile, - StringRef SysRoot) + StringRef SysRoot, + DirectiveBehavior Action = IgnoreEmbed) : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { } void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, @@ -52,6 +62,12 @@ class DependencyGraphCallback : public PPCallbacks { StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override; + void EndOfMainFile() override { OutputGraphFile(); } @@ -70,6 +86,31 @@ void DependencyGraphCallback::InclusionDirective( bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) { + if ((Behavior & IgnoreInclude) == IgnoreInclude) { + return; + } + if (!File) + return; + + SourceManager &SM = PP->getSourceManager(); + OptionalFileEntryRef FromFile = + SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc))); + if (!FromFile) + return; + + Dependencies[*FromFile].push_back(*File); + + AllFiles.insert(*File); + AllFiles.insert(*FromFile); +} + +void DependencyGraphCallback::EmbedDirective( + SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) { + if ((Behavior & IgnoreEmbed) == IgnoreEmbed) { + return; + } if (!File) return; diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 846e5fce6de7b..b7d084773b0a1 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -498,6 +498,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__STDC_UTF_16__", "1"); Builder.defineMacro("__STDC_UTF_32__", "1"); + // __has_embed definitions + Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0"); + Builder.defineMacro("__STDC_EMBED_FOUND__", "1"); + Builder.defineMacro("__STDC_EMBED_EMPTY__", "2"); + if (LangOpts.ObjC) Builder.defineMacro("__OBJC__"); @@ -729,6 +734,8 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, if (LangOpts.Char8) Builder.defineMacro("__cpp_char8_t", "202207L"); Builder.defineMacro("__cpp_impl_destroying_delete", "201806L"); + + Builder.defineMacro("__cpp_pp_embed", "202403L"); } /// InitializeOpenCLFeatureTestMacros - Define OpenCL macros based on target diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 7f5f669068230..fb9baa92e6836 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -93,6 +93,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { bool DisableLineMarkers; bool DumpDefines; bool DumpIncludeDirectives; + bool DumpEmbedDirectives; bool UseLineDirectives; bool IsFirstFileEntered; bool MinimizeWhitespace; @@ -106,12 +107,13 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, - bool defines, bool DumpIncludeDirectives, + bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives, bool UseLineDirectives, bool MinimizeWhitespace, bool DirectivesOnly, bool KeepSystemIncludes) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), + DumpEmbedDirectives(DumpEmbedDirectives), UseLineDirectives(UseLineDirectives), MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly), KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) { @@ -149,6 +151,11 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override; void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -398,6 +405,20 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, } } +void PrintPPOutputPPCallbacks::EmbedDirective( + SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) { + // In -dI mode, dump #include directives prior to dumping their content or + // interpretation. + if (DumpEmbedDirectives) { + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); + *OS << "#embed " << (IsAngled ? '<' : '"') << FileName + << (IsAngled ? '>' : '"') << " /* clang -E -dE */"; + setEmittedDirectiveOnThisLine(); + } +} + void PrintPPOutputPPCallbacks::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, @@ -981,7 +1002,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.UseLineDirectives, + Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); // Expand macros in pragmas with -fms-extensions. The assumption is that diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp index 28f7b0b9edfc5..4a73946951fd9 100644 --- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp +++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp @@ -71,6 +71,11 @@ class InclusionRewriter : public PPCallbacks { FileID PrevFID) override; void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok, SrcMgr::CharacteristicKind FileType) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override; void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -177,6 +182,14 @@ void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/, LastInclusionLocation = SourceLocation(); } +/// This should be called whenever the preprocessor encounters embed +/// directives. +void InclusionRewriter::EmbedDirective( + SourceLocation /*HashLoc*/, StringRef /*FileName*/, bool /*IsAngled*/, + CharSourceRange /*FilenameRange*/, CharSourceRange /*ParametersRange*/, + OptionalFileEntryRef /*File*/, StringRef /*SearchPath*/, + StringRef /*RelativePath*/) {} + /// This should be called whenever the preprocessor encounters include /// directives. It does not say whether the file has been included, but it /// provides more information about the directive (hash location instead diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp index f2b60a728e901..ea5dce2c27a58 100644 --- a/clang/lib/Lex/PPCallbacks.cpp +++ b/clang/lib/Lex/PPCallbacks.cpp @@ -14,16 +14,5 @@ using namespace clang; // Out of line key method. PPCallbacks::~PPCallbacks() = default; -void PPCallbacks::HasInclude(SourceLocation Loc, StringRef FileName, - bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType) {} - // Out of line key method. PPChainedCallbacks::~PPChainedCallbacks() = default; - -void PPChainedCallbacks::HasInclude(SourceLocation Loc, StringRef FileName, - bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType) { - First->HasInclude(Loc, FileName, IsAngled, File, FileType); - Second->HasInclude(Loc, FileName, IsAngled, File, FileType); -} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index e3065c17dc70b..e0d98d7ca03fa 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -18,7 +18,9 @@ #include "clang/Basic/Module.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Frontend/FrontendOptions.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/LexDiagnostic.h" @@ -1079,6 +1081,101 @@ OptionalFileEntryRef Preprocessor::LookupFile( return std::nullopt; } +OptionalFileEntryRef Preprocessor::LookupEmbedFile( + SourceLocation FilenameLoc, StringRef Filename, bool isAngled, + bool OpenFile, const FileEntry *LookupFromFile, + SmallVectorImpl *SearchPath, SmallVectorImpl *RelativePath) { + FileManager &FM = this->getFileManager(); + if (llvm::sys::path::is_absolute(Filename)) { + // lookup path or immediately fail + llvm::Expected ShouldBeEntry = + FM.getFileRef(Filename, true, OpenFile); + return llvm::expectedToOptional(std::move(ShouldBeEntry)); + } + + // Otherwise, it's search time! + SmallString<512> LookupPath; + // Non-angled lookup + if (!isAngled) { + bool TryLocalLookup = false; + if (SearchPath) { + // use the provided search path as the local lookup path + llvm::sys::path::native(*SearchPath, LookupPath); + TryLocalLookup = true; + } else if (LookupFromFile) { + // Use file-based lookup here + StringRef FullFileDir = LookupFromFile->tryGetRealPathName(); + if (!FullFileDir.empty()) { + llvm::sys::path::native(FullFileDir, LookupPath); + llvm::sys::path::remove_filename(LookupPath); + TryLocalLookup = true; + } + } else { + // Cannot do local lookup: give up. + TryLocalLookup = false; + } + if (TryLocalLookup) { + if (!LookupPath.empty() && + !llvm::sys::path::is_separator(LookupPath.back())) { + LookupPath.append(llvm::sys::path::get_separator()); + } + LookupPath.append(Filename); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, true, OpenFile); + if (ShouldBeEntry) { + return std::move(*ShouldBeEntry); + } else { + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + } + + if (!isAngled) { + // do working directory lookup + LookupPath.clear(); + auto MaybeWorkingDirEntry = FM.getDirectoryRef("."); + if (MaybeWorkingDirEntry) { + DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry; + StringRef WorkingDir = WorkingDirEntry.getName(); + if (!WorkingDir.empty()) { + llvm::sys::path::native(WorkingDir, LookupPath); + if (!LookupPath.empty() && + !llvm::sys::path::is_separator(LookupPath.back())) { + LookupPath.append(llvm::sys::path::get_separator()); + } + LookupPath.append(llvm::sys::path::get_separator()); + LookupPath.append(Filename); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, true, OpenFile); + if (ShouldBeEntry) { + return std::move(*ShouldBeEntry); + } else { + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + } + } + + for (const auto &Entry : PPOpts->EmbedEntries) { + LookupPath.clear(); + llvm::sys::path::native(Entry, LookupPath); + if (!LookupPath.empty() && + !llvm::sys::path::is_separator(LookupPath.back())) { + LookupPath.append(llvm::sys::path::get_separator()); + } + LookupPath.append(Filename.begin(), Filename.end()); + llvm::sys::path::native(LookupPath); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, true, OpenFile); + if (ShouldBeEntry) { + return std::move(*ShouldBeEntry); + } else { + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + return std::nullopt; +} + //===----------------------------------------------------------------------===// // Preprocessor Directive Handling. //===----------------------------------------------------------------------===// @@ -1174,6 +1271,7 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp_include_next: case tok::pp___include_macros: case tok::pp_pragma: + case tok::pp_embed: Diag(Result, diag::err_embedded_directive) << II->getName(); Diag(*ArgMacro, diag::note_macro_expansion_here) << ArgMacro->getIdentifierInfo(); @@ -1288,6 +1386,11 @@ void Preprocessor::HandleDirective(Token &Result) { return HandleIdentSCCSDirective(Result); case tok::pp_sccs: return HandleIdentSCCSDirective(Result); + case tok::pp_embed: + return HandleEmbedDirective(SavedHash.getLocation(), Result, + getCurrentFileLexer() + ? getCurrentFileLexer()->getFileEntry() + : nullptr); case tok::pp_assert: //isExtension = true; // FIXME: implement #assert break; @@ -3517,3 +3620,400 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken, HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true, /*FoundElse*/ CI.FoundElse, ElifToken.getLocation()); } + +enum class BracketType { Brace, Paren, Square }; + +Preprocessor::LexEmbedParametersResult +Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, + bool DiagnoseUnknown) { + LexEmbedParametersResult Result{}; + SmallString<32> Parameter; + SmallVector ParameterTokens; + tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod; + Result.StartLoc = CurTok.getLocation(); + for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) { + Parameter.clear(); + // Lex identifier [:: identifier ...] + if (!CurTok.is(tok::identifier)) { + Diag(CurTok, diag::err_expected) << "identifier"; + DiscardUntilEndOfDirective(); + return Result; + } + Token ParameterStartTok = CurTok; + IdentifierInfo *InitialID = CurTok.getIdentifierInfo(); + Parameter.append(InitialID->getName()); + for (LexNonComment(CurTok); CurTok.is(tok::coloncolon); + LexNonComment(CurTok)) { + Parameter.append("::"); + LexNonComment(CurTok); + if (!CurTok.is(tok::identifier)) { + Diag(CurTok, diag::err_expected) << "identifier"; + DiscardUntilEndOfDirective(); + return Result; + } + IdentifierInfo *NextID = CurTok.getIdentifierInfo(); + Parameter.append(NextID->getName()); + } + // Lex the parameters (dependent on the parameter type we want!) + if (Parameter == "limit") { + // we have a limit parameter and its internals are processed using + // evaluation rules from #if - handle here + if (CurTok.isNot(tok::l_paren)) { + Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter; + DiscardUntilEndOfDirective(); + return Result; + } + IdentifierInfo *ParameterIfNDef = nullptr; + DirectiveEvalResult LimitEvalResult = + EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true); + if (!LimitEvalResult.Value) { + return Result; + } + const llvm::APSInt &LimitResult = *LimitEvalResult.Value; + const bool ValueDoesNotFit = + LimitResult.getBitWidth() > 64 + ? true + : (LimitResult.isUnsigned() || + (LimitResult.isSigned() && LimitResult.isNegative())); + if (ValueDoesNotFit) { + Diag(CurTok, diag::warn_pp_expr_overflow); + // just truncate and roll with that, I guess? + Result.MaybeLimitParam = + static_cast(LimitResult.getRawData()[0]); + } else { + Result.MaybeLimitParam = + static_cast(LimitResult.getZExtValue()); + } + LexNonComment(CurTok); + } else if (Parameter == "clang::offset") { + // we have a limit parameter and its internals are processed using + // evaluation rules from #if - handle here + if (CurTok.isNot(tok::l_paren)) { + Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter; + DiscardUntilEndOfDirective(); + return Result; + } + IdentifierInfo *ParameterIfNDef = nullptr; + DirectiveEvalResult OffsetEvalResult = + EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true); + if (!OffsetEvalResult.Value) { + return Result; + } + const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value; + if (OffsetResult.getBitWidth() > 64) { + Diag(CurTok, diag::warn_pp_expr_overflow); + // just truncate and roll with that, I guess? + Result.MaybeOffsetParam = + static_cast(OffsetResult.getRawData()[0]); + } else { + Result.MaybeOffsetParam = + static_cast(OffsetResult.getZExtValue()); + } + LexNonComment(CurTok); + } else { + if (CurTok.is(tok::l_paren)) { + SmallVector Brackets; + Brackets.push_back(BracketType::Paren); + auto ParseArgToken = [&]() { + for (LexNonComment(CurTok); CurTok.isNot(tok::eod); + LexNonComment(CurTok)) { + switch (CurTok.getKind()) { + default: + break; + case tok::l_paren: + Brackets.push_back(BracketType::Paren); + break; + case tok::r_paren: + if (Brackets.back() != BracketType::Paren) { + Diag(CurTok, diag::err_pp_expected_rparen); + return false; + } + Brackets.pop_back(); + if (Brackets.empty()) { + return true; + } + break; + case tok::l_brace: + Brackets.push_back(BracketType::Brace); + break; + case tok::r_brace: + if (Brackets.back() != BracketType::Brace) { + Diag(CurTok, diag::err_expected) << "}"; + return false; + } + Brackets.pop_back(); + break; + case tok::l_square: + Brackets.push_back(BracketType::Square); + break; + case tok::r_square: + if (Brackets.back() != BracketType::Square) { + Diag(CurTok, diag::err_expected) << "]"; + return false; + } + Brackets.pop_back(); + break; + } + ParameterTokens.push_back(CurTok); + } + if (!Brackets.empty()) { + Diag(CurTok, diag::err_pp_expected_rparen); + DiscardUntilEndOfDirective(); + return false; + } + return true; + }; + if (!ParseArgToken()) { + return Result; + } + if (!CurTok.is(tok::r_paren)) { + Diag(CurTok, diag::err_pp_expected_rparen); + DiscardUntilEndOfDirective(); + return Result; + } + Lex(CurTok); + } + // "Token-soup" parameters + if (Parameter == "if_empty") { + // TODO: integer list optimization + Result.MaybeIfEmptyParam = std::move(ParameterTokens); + } else if (Parameter == "prefix") { + // TODO: integer list optimization + Result.MaybePrefixParam = std::move(ParameterTokens); + } else if (Parameter == "suffix") { + // TODO: integer list optimization + Result.MaybeSuffixParam = std::move(ParameterTokens); + } else { + ++Result.UnrecognizedParams; + if (DiagnoseUnknown) { + Diag(ParameterStartTok, diag::warn_pp_unknown_parameter_ignored) + << 1 << Parameter; + } + } + } + } + Result.Successful = true; + return Result; +} + +// This array must survive for an extended period of time +inline constexpr const char *IntegerLiterals[] = { + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", + "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", + "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", + "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", + "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", + "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", + "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", + "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", + "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", + "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", "109", + "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120", + "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131", + "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", + "143", "144", "145", "146", "147", "148", "149", "150", "151", "152", "153", + "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164", + "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175", + "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186", + "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197", + "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208", + "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219", + "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230", + "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241", + "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252", + "253", "254", "255"}; + +void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc, + LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth) { + (void)TargetCharWidth; // for later, when we support various sizes + size_t TokenIndex = 0; + const size_t InitListTokensSize = [&]() { + if (BinaryContents.empty()) { + if (Params.MaybeIfEmptyParam) { + return Params.MaybeIfEmptyParam->size(); + } else { + return static_cast(0); + } + } else { + return static_cast( + (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) + + (BinaryContents.size() * 2 - 1) + + (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0)); + } + }(); + std::unique_ptr InitListTokens(new Token[InitListTokensSize]()); + + if (BinaryContents.empty()) { + if (Params.MaybeIfEmptyParam) { + std::copy(Params.MaybeIfEmptyParam->begin(), + Params.MaybeIfEmptyParam->end(), InitListTokens.get()); + TokenIndex += Params.MaybeIfEmptyParam->size(); + assert(TokenIndex == InitListTokensSize); + EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, + true); + } + return; + } + + // FIXME: this does not take the target's byte size into account; + // will fail on many DSPs and embedded machines! + if (Params.MaybePrefixParam) { + std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(), + InitListTokens.get() + TokenIndex); + TokenIndex += Params.MaybePrefixParam->size(); + } + for (size_t I = 0; I < BinaryContents.size(); ++I) { + unsigned char ByteValue = BinaryContents[I]; + StringRef ByteRepresentation = IntegerLiterals[ByteValue]; + const size_t InitListIndex = TokenIndex; + Token &IntToken = InitListTokens[InitListIndex]; + IntToken.setKind(tok::numeric_constant); + IntToken.setLiteralData(ByteRepresentation.data()); + IntToken.setLength(ByteRepresentation.size()); + IntToken.setLocation(FilenameLoc); + ++TokenIndex; + bool AtEndOfContents = I == (BinaryContents.size() - 1); + if (!AtEndOfContents) { + const size_t CommaInitListIndex = InitListIndex + 1; + Token &CommaToken = InitListTokens[CommaInitListIndex]; + CommaToken.setKind(tok::comma); + CommaToken.setLocation(FilenameLoc); + ++TokenIndex; + } + } + if (Params.MaybeSuffixParam) { + std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(), + InitListTokens.get() + TokenIndex); + TokenIndex += Params.MaybeSuffixParam->size(); + } + assert(TokenIndex == InitListTokensSize); + EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false); +} + +void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc, + LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth) { + // TODO: implement direct built-in support + HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, + TargetCharWidth); +} + +void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, + const FileEntry *LookupFromFile) { + if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed + : diag::warn_cxx26_pp_embed); + Diag(EmbedTok, EitherDiag); + } + + // Parse the filename header + Token FilenameTok; + if (LexHeaderName(FilenameTok)) + return; + + if (FilenameTok.isNot(tok::header_name)) { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + if (FilenameTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(); + return; + } + + // Parse the optional sequence of + // directive-parameters: + // identifier parameter-name-list[opt] directive-argument-list[opt] + // directive-argument-list: + // '(' balanced-token-sequence ')' + // parameter-name-list: + // '::' identifier parameter-name-list[opt] + Token CurTok; + LexEmbedParametersResult Params = LexEmbedParameters( + CurTok, /*InHasEmbed=*/false, /*DiagnoseUnknown=*/true); + + // Now, splat the data out! + SmallString<128> FilenameBuffer; + SmallString<512> SearchPath; + SmallString<512> RelativePath; + StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); + SourceLocation FilenameLoc = FilenameTok.getLocation(); + StringRef OriginalFilename = Filename; + bool isAngled = + GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + assert(!Filename.empty()); + OptionalFileEntryRef MaybeFileRef = + this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false, + LookupFromFile, &SearchPath, &RelativePath); + if (!MaybeFileRef) { + // could not find file + if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) { + return; + } + Diag(FilenameTok, diag::err_pp_file_not_found) + << Filename; + return; + } + std::optional MaybeSignedLimit{}; + if (Params.MaybeLimitParam) { + if (static_cast(INT64_MAX) >= *Params.MaybeLimitParam) { + MaybeSignedLimit = static_cast(*Params.MaybeLimitParam); + } + } + llvm::ErrorOr> MaybeFile = getFileManager().getBufferForFile( + *MaybeFileRef, false, false, MaybeSignedLimit); + if (!MaybeFile) { + // could not find file + Diag(FilenameTok, diag::err_cannot_open_file) + << Filename << "a buffer to the contents could not be created"; + return; + } + StringRef BinaryContents = MaybeFile.get()->getBuffer(); + if (Params.MaybeOffsetParam) { + // offsets all the way to the end of the file make for an empty file. + const size_t OffsetParam = *Params.MaybeOffsetParam; + BinaryContents = BinaryContents.substr(OffsetParam); + } + const size_t TargetCharWidth = getTargetInfo().getCharWidth(); + if (TargetCharWidth > 64) { + // Too wide for us to handle + Diag(EmbedTok, diag::err_pp_unsupported_directive) + << 1 + << "CHAR_BIT is too wide for the target architecture to handle " + "properly"; + return; + } + if (TargetCharWidth != 8) { + Diag(EmbedTok, diag::err_pp_unsupported_directive) + << 1 + << "At the moment, we do not have the machinery to support non 8-bit " + "CHAR_BIT targets!"; + return; + } + if (CHAR_BIT % TargetCharWidth != 0) { + Diag(EmbedTok, diag::err_pp_unsupported_directive) + << 1 + << "CHAR_BIT is not evenly divisible by host architecture's byte " + "definition"; + return; + } + if (Callbacks) { + CharSourceRange FilenameSourceRange( + SourceRange(FilenameTok.getLocation(), FilenameTok.getEndLoc()), true); + CharSourceRange ParametersRange(SourceRange(Params.StartLoc, Params.EndLoc), + true); + Callbacks->EmbedDirective(HashLoc, Filename, isAngled, FilenameSourceRange, + ParametersRange, MaybeFileRef, SearchPath, + RelativePath); + } + if (PPOpts->NoBuiltinPPEmbed) { + HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, + TargetCharWidth); + } else { + // emit a token directly, handle it internally. + HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents, + TargetCharWidth); + } +} diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 269984aae07bf..dda5717afc699 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -868,7 +868,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, /// may occur after a #if or #elif directive. If the expression is equivalent /// to "!defined(X)" return X in IfNDefMacro. Preprocessor::DirectiveEvalResult -Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { +Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + Token &Tok, bool CheckForEoD, + bool Parenthesized) { SaveAndRestore PPDir(ParsingIfOrElifDirective, true); // Save the current state of 'DisableMacroExpansion' and reset it to false. If // 'DisableMacroExpansion' is true, then we must be in a macro argument list @@ -880,7 +882,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { DisableMacroExpansion = false; // Peek ahead one token. - Token Tok; LexNonComment(Tok); // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. @@ -901,7 +902,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // We cannot trust the source range from the value because there was a // parse error. Track the range manually -- the end of the directive is the // end of the condition range. - return {false, + return {std::nullopt, + false, DT.IncludedUndefinedIds, {ExprStartLoc, ConditionRange.getEnd()}}; } @@ -917,7 +919,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; + const bool IsNonZero = ResVal.Val != 0; + const SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, + ValRange}; } // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the @@ -930,17 +935,34 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {false, DT.IncludedUndefinedIds, ResVal.getRange()}; + const bool IsNonZero = ResVal.Val != 0; + const SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, + ValRange}; } - // If we aren't at the tok::eod token, something bad happened, like an extra - // ')' token. - if (Tok.isNot(tok::eod)) { - Diag(Tok, diag::err_pp_expected_eol); - DiscardUntilEndOfDirective(); + if (CheckForEoD) { + // If we aren't at the tok::eod token, something bad happened, like an extra + // ')' token. + if (Tok.isNot(tok::eod)) { + Diag(Tok, diag::err_pp_expected_eol); + DiscardUntilEndOfDirective(); + } } // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; + const bool IsNonZero = ResVal.Val != 0; + const SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange}; +} + +/// EvaluateDirectiveExpression - Evaluate an integer constant expression that +/// may occur after a #if or #elif directive. If the expression is equivalent +/// to "!defined(X)" return X in IfNDefMacro. +Preprocessor::DirectiveEvalResult Preprocessor::EvaluateDirectiveExpression( + IdentifierInfo *&IfNDefMacro, bool CheckForEoD, bool Parenthesized) { + Token Tok; + return EvaluateDirectiveExpression(IfNDefMacro, Tok, CheckForEoD, + Parenthesized); } diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index b371f8cf7a9c0..6e0163ccc89b7 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__has_c_attribute = nullptr; Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute"); + Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed"); Ident__has_include = RegisterBuiltinMacro(*this, "__has_include"); Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next"); Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning"); @@ -1264,6 +1265,114 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, return File.has_value(); } +/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression. +/// Returns a filled optional with the value if successful; otherwise, empty. +int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { + // pedwarn for not being on C23 + if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed + : diag::warn_cxx26_pp_has_embed); + Diag(Tok, EitherDiag); + } + + // Save the location of the current token. If a '(' is later found, use + // that location. If not, use the end of this location instead. + SourceLocation LParenLoc = Tok.getLocation(); + + // These expressions are only allowed within a preprocessor directive. + if (!this->isParsingIfOrElifDirective()) { + Diag(LParenLoc, diag::err_pp_directive_required) << II; + // Return a valid identifier token. + assert(Tok.is(tok::identifier)); + Tok.setIdentifierInfo(II); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + // Get '('. If we don't have a '(', try to form a header-name token. + do { + if (this->LexHeaderName(Tok)) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + } while (Tok.getKind() == tok::comment); + + // Ensure we have a '('. + if (Tok.isNot(tok::l_paren)) { + // No '(', use end of last token. + LParenLoc = this->getLocForEndOfToken(LParenLoc); + this->Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren; + // If the next token looks like a filename or the start of one, + // assume it is and process it as such. + if (Tok.isNot(tok::header_name)) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + } else { + // Save '(' location for possible missing ')' message. + LParenLoc = Tok.getLocation(); + if (this->LexHeaderName(Tok)) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + } + + if (Tok.isNot(tok::header_name)) { + Diag(Tok.getLocation(), diag::err_pp_expects_filename); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + SourceLocation FilenameLoc = Tok.getLocation(); + Token FilenameTok = Tok; + + Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false); + if (!Params.Successful) { + if (Tok.isNot(tok::eod)) + this->DiscardUntilEndOfDirective(); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + if (Params.UnrecognizedParams > 0) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + if (!Tok.is(tok::r_paren)) { + Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after) + << II << tok::r_paren; + Diag(LParenLoc, diag::note_matching) << tok::l_paren; + DiscardUntilEndOfDirective(); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + + SmallString<128> FilenameBuffer; + SmallString<256> RelativePath; + StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer); + StringRef OriginalFilename = Filename; + bool isAngled = + this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + assert(!Filename.empty()); + const FileEntry *LookupFromFile = + this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry() + : nullptr; + OptionalFileEntryRef MaybeFileEntry = + this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false, + LookupFromFile, nullptr, + &RelativePath); + if (Callbacks) { + Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry); + } + if (!MaybeFileEntry) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + size_t FileSize = MaybeFileEntry->getSize(); + if (FileSize == 0 || + (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) { + return VALUE__STDC_EMBED_EMPTY__; + } + if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) { + return VALUE__STDC_EMBED_EMPTY__; + } + return VALUE__STDC_EMBED_FOUND__; +} + bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) { return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr); } @@ -1801,6 +1910,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { return; OS << (int)Value; Tok.setKind(tok::numeric_constant); + } else if (II == Ident__has_embed) { + // The argument to these two builtins should be a parenthesized + // file name string literal using angle brackets (<>) or + // double-quotes (""), optionally followed by a series of + // arguments similar to form like attributes. + int Value = EvaluateHasEmbed(Tok, II); + + if (Tok.isNot(tok::r_paren)) + return; + OS << Value; + Tok.setKind(tok::numeric_constant); } else if (II == Ident__has_warning) { // The argument should be a parenthesized string literal. EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt new file mode 100644 index 0000000000000..93d177a48c83a --- /dev/null +++ b/clang/test/Preprocessor/Inputs/jk.txt @@ -0,0 +1 @@ +jk \ No newline at end of file diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt new file mode 100644 index 0000000000000..1ce9ab967e4a1 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/media/art.txt @@ -0,0 +1,9 @@ + __ _ + .-.' `; `-._ __ _ + (_, .-:' `; `-._ + ,'o"( (_, ) + (__,-' ,'o"( )> + ( (__,-' ) + `-'._.--._( ) + ||| |||`-'._.--._.-' + ||| ||| diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt new file mode 100644 index 0000000000000..63d8dbd40c235 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/single_byte.txt @@ -0,0 +1 @@ +b \ No newline at end of file diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c new file mode 100644 index 0000000000000..80980e753614a --- /dev/null +++ b/clang/test/Preprocessor/embed___has_embed.c @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 %s -E -embed-dir=%S/Inputs -CC -verify + +#if !__has_embed(__FILE__) +#error 1 +#elif !__has_embed("media/art.txt") +#error 2 +#elif __has_embed("asdkasdjkadsjkdsfjk") +#error 3 +#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1)) +#error 4 +#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1)) +#error 5 +#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD")) +#error 6 +#elif !__has_embed(__FILE__ limit(2) prefix(y)) +#error 7 +#elif !__has_embed(__FILE__ limit(2)) +#error 8 +#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x)) +#error 9 +#elif __has_embed() != 2 +#error 10 +#elif __has_embed( limit(0)) != 2 +#error 11 +#elif __has_embed( limit(0)) != 2 +#error 12 +#elif __has_embed( limit(1) clang::offset(1)) != 2 +#error 13 +#elif !__has_embed() +#error 14 +#elif !__has_embed( if_empty(meow)) +#error 14 +#endif +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c new file mode 100644 index 0000000000000..fe0edb00e6098 --- /dev/null +++ b/clang/test/Preprocessor/embed___has_embed_supported.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 %s -E -CC -verify + +#if !__has_embed(__FILE__) +#error 1 +#elif !__has_embed(__FILE__) +#error 2 +#elif !__has_embed(__FILE__ suffix(x)) +#error 3 +#elif !__has_embed(__FILE__ suffix(x) limit(1)) +#error 4 +#elif !__has_embed(__FILE__ suffix(x) limit(1) prefix(1)) +#error 5 +#elif !__has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1)) +#error 6 +#elif !__has_embed(__FILE__ suffix(x) limit(0) prefix(1)) +#error 7 +#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != 2 +#error 8 +#elif __has_embed(__FILE__ suffix(x) limit(0)) != 2 +#error 9 +#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != 2 +#error 10 +#endif +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp new file mode 100644 index 0000000000000..46787041ca23b --- /dev/null +++ b/clang/test/Preprocessor/embed_feature_test.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -E -CC -verify +// RUN: %clang_cc1 -x c %s -E -CC -verify + +#if defined(__cplusplus) +#if !defined(__cpp_pp_embed) || __cpp_pp_embed != 202403L +#error 1 +#endif +#endif + +#if !defined(__has_embed) +#error 2 +#endif +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_file_not_found.c b/clang/test/Preprocessor/embed_file_not_found.c new file mode 100644 index 0000000000000..337fa4ac067ec --- /dev/null +++ b/clang/test/Preprocessor/embed_file_not_found.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 %s -E -CC -verify + +#embed +// expected-error@-1 {{'nfejfNejAKFe' file not found}} diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c new file mode 100644 index 0000000000000..cd517b7f216ac --- /dev/null +++ b/clang/test/Preprocessor/embed_init.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify + +typedef struct kitty { + int purr; +} kitty; + +typedef struct kitty_kitty { + int here; + kitty kit; +} kitty_kitty; + +const int meow = +#embed +; + +const kitty kit = { +#embed +}; + +const kitty_kitty kit_kit = { +#embed +}; + +_Static_assert(meow == 'b', ""); +_Static_assert(kit.purr == 'b', ""); +_Static_assert(kit_kit.here == 'j', ""); +_Static_assert(kit_kit.kit.purr == 'k', ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c new file mode 100644 index 0000000000000..ac1a768b27fff --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_if_empty.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed if_empty(123, 124, 125) +}; +const char non_empty_data[] = { +#embed if_empty(123, 124, 125) +}; +_Static_assert(sizeof(data) == 3, ""); +_Static_assert(123 == data[0], ""); +_Static_assert(124 == data[1], ""); +_Static_assert(125 == data[2], ""); +_Static_assert(sizeof(non_empty_data) == 2, ""); +_Static_assert('j' == non_empty_data[0], ""); +_Static_assert('k' == non_empty_data[1], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c new file mode 100644 index 0000000000000..28a94fe9430f0 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_limit.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed +}; +const char offset_data[] = { +#embed limit(1) +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('j' == data[0], ""); +_Static_assert('k' == data[1], ""); +_Static_assert(sizeof(offset_data) == 1, ""); +_Static_assert('j' == offset_data[0], ""); +_Static_assert(offset_data[0] == data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c new file mode 100644 index 0000000000000..71a029544dca5 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_offset.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed +}; +const char offset_data[] = { +#embed clang::offset(1) +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('j' == data[0], ""); +_Static_assert('k' == data[1], ""); +_Static_assert(sizeof(offset_data) == 1, ""); +_Static_assert('k' == offset_data[0], ""); +_Static_assert(offset_data[0] == data[1], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c new file mode 100644 index 0000000000000..5182a2b874d39 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_prefix.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed prefix('\xA', ) +}; +const char empty_data[] = { +#embed prefix('\xA', ) +1 +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('\xA' == data[0], ""); +_Static_assert('b' == data[1], ""); +_Static_assert(sizeof(empty_data) == 1, ""); +_Static_assert(1 == empty_data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c new file mode 100644 index 0000000000000..11c3f2bbbfb2b --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_suffix.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed suffix(, '\xA') +}; +const char empty_data[] = { +#embed suffix(, '\xA') +1 +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('b' == data[0], ""); +_Static_assert('\xA' == data[1], ""); +_Static_assert(sizeof(empty_data) == 1, ""); +_Static_assert(1 == empty_data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c new file mode 100644 index 0000000000000..1f043ccd2ff54 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -E -CC -verify + +#embed __FILE__ unrecognized +// expected-warning@-1 {{unknown embed preprocessor parameter 'unrecognized' ignored}} +#embed __FILE__ unrecognized::param +// expected-warning@-1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}} +#embed __FILE__ unrecognized::param(with, args) +// expected-warning@-1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}} diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c new file mode 100644 index 0000000000000..5c33871c0c8a4 --- /dev/null +++ b/clang/test/Preprocessor/embed_path_chevron.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify + +const char data[] = { +#embed +}; +_Static_assert(sizeof(data) == 1, ""); +_Static_assert('b' == data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c new file mode 100644 index 0000000000000..791cd9176ebe0 --- /dev/null +++ b/clang/test/Preprocessor/embed_path_quote.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify + +const char data[] = { +#embed "single_byte.txt" +}; +_Static_assert(sizeof(data) == 1, ""); +_Static_assert('a' == data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt new file mode 100644 index 0000000000000..2e65efe2a145d --- /dev/null +++ b/clang/test/Preprocessor/single_byte.txt @@ -0,0 +1 @@ +a \ No newline at end of file diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 103c08ffbe83b..8f9d7c77ccd15 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -777,6 +777,13 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS) "Semicolon-separated list of components to include in libLLVM, or \"all\".") endif() +option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON) +# Quiet down MSVC-style secure CRT warnings +if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1) +endif() + + if(MSVC) option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON) # Set this variable to OFF here so it can't be set with a command-line diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake index 1be13bc01ab9b..828227f2f25a2 100644 --- a/llvm/cmake/modules/GetHostTriple.cmake +++ b/llvm/cmake/modules/GetHostTriple.cmake @@ -2,7 +2,7 @@ # Invokes config.guess function( get_host_triple var ) - if( MSVC ) + if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") ) if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" ) set( value "aarch64-pc-windows-msvc" ) elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" ) @@ -41,7 +41,7 @@ function( get_host_triple var ) else() set( value "powerpc-ibm-aix" ) endif() - else( MSVC ) + else() if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS) message(WARNING "unable to determine host target triple") else() @@ -55,6 +55,6 @@ function( get_host_triple var ) endif( NOT TT_RV EQUAL 0 ) set( value ${TT_OUT} ) endif() - endif( MSVC ) + endif() set( ${var} ${value} PARENT_SCOPE ) endfunction( get_host_triple var )