Skip to content

Commit 739b410

Browse files
committed
Add a warning, flags and pragmas to limit the number of pre-processor tokens in a translation unit
See https://docs.google.com/document/d/1xMkTZMKx9llnMPgso0jrx3ankI4cv60xeZ0y4ksf4wc/preview for background discussion. This adds a warning, flags and pragmas to limit the number of pre-processor tokens either at a certain point in a translation unit, or overall. The idea is that this would allow projects to limit the size of certain widely included headers, or for translation units overall, as a way to insert backstops for header bloat and prevent compile-time regressions. Differential revision: https://reviews.llvm.org/D72703
1 parent 9965b12 commit 739b410

File tree

13 files changed

+200
-2
lines changed

13 files changed

+200
-2
lines changed

clang/include/clang/Basic/DiagnosticGroups.td

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,3 +1149,32 @@ def CrossTU : DiagGroup<"ctu">;
11491149
def CTADMaybeUnsupported : DiagGroup<"ctad-maybe-unsupported">;
11501150

11511151
def FortifySource : DiagGroup<"fortify-source">;
1152+
1153+
def MaxTokens : DiagGroup<"max-tokens"> {
1154+
code Documentation = [{
1155+
The warning is issued if the number of pre-processor tokens exceeds
1156+
the token limit, which can be set in three ways:
1157+
1158+
1. As a limit at a specific point in a file, using the ``clang max_tokens_here``
1159+
pragma:
1160+
1161+
.. code-block: c++
1162+
#pragma clang max_tokens_here 1234
1163+
1164+
2. As a per-translation unit limit, using the ``-fmax-tokens`` command-line
1165+
flag:
1166+
1167+
.. code-block: console
1168+
clang -c a.cpp -fmax-tokens 1234
1169+
1170+
3. As a per-translation unit limit using the ``clang max_tokens_total`` pragma,
1171+
which works like and overrides the ``-fmax-tokens`` flag:
1172+
1173+
.. code-block: c++
1174+
#pragma clang max_file_tokens 1234
1175+
1176+
These limits can be helpful in limiting code growth through included files.
1177+
1178+
Setting a token limit of zero means no limit.
1179+
}];
1180+
}

clang/include/clang/Basic/DiagnosticParseKinds.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,8 @@ def warn_pragma_expected_section_label_or_name : Warning<
10461046
def warn_pragma_expected_init_seg : Warning<
10471047
"expected 'compiler', 'lib', 'user', or a string literal for the section name in '#pragma %0' - ignored">,
10481048
InGroup<IgnoredPragmas>;
1049+
1050+
def err_pragma_expected_integer : Error<"expected an integer argument in '#pragma %0'">;
10491051
def warn_pragma_expected_integer : Warning<
10501052
"expected integer between %0 and %1 inclusive in '#pragma %2' - ignored">,
10511053
InGroup<IgnoredPragmas>;
@@ -1375,4 +1377,14 @@ def err_placeholder_expected_auto_or_decltype_auto : Error<
13751377
"expected 'auto' or 'decltype(auto)' after concept name">;
13761378
}
13771379

1380+
def warn_max_tokens : Warning<
1381+
"the number of preprocessor source tokens (%0) exceeds this token limit (%1)">,
1382+
InGroup<MaxTokens>;
1383+
1384+
def warn_max_tokens_total : Warning<
1385+
"the total number of preprocessor source tokens (%0) exceeds the token limit (%1)">,
1386+
InGroup<MaxTokens>;
1387+
1388+
def note_max_tokens_total_override : Note<"total token limit set here">;
1389+
13781390
} // end of Parser diagnostics

clang/include/clang/Basic/LangOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,8 @@ LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0,
344344

345345
LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors")
346346

347+
COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0")
348+
347349
#undef LANGOPT
348350
#undef COMPATIBLE_LANGOPT
349351
#undef BENIGN_LANGOPT

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,9 @@ def emit_merged_ifs : Flag<["-"], "emit-merged-ifs">,
646646
def interface_stub_version_EQ : JoinedOrSeparate<["-"], "interface-stub-version=">, Flags<[CC1Option]>;
647647
def exported__symbols__list : Separate<["-"], "exported_symbols_list">;
648648
def e : JoinedOrSeparate<["-"], "e">, Group<Link_Group>;
649+
def fmax_tokens : Separate<["-"], "fmax-tokens">,
650+
HelpText<"Max total number of preprocessed tokens for -Wmax-tokens.">,
651+
Group<f_Group>, Flags<[CC1Option]>;
649652
def fPIC : Flag<["-"], "fPIC">, Group<f_Group>;
650653
def fno_PIC : Flag<["-"], "fno-PIC">, Group<f_Group>;
651654
def fPIE : Flag<["-"], "fPIE">, Group<f_Group>;

clang/include/clang/Lex/Preprocessor.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,14 @@ class Preprocessor {
416416
/// of phase 4 of translation or for some other situation.
417417
unsigned LexLevel = 0;
418418

419+
/// The number of (LexLevel 0) preprocessor tokens.
420+
unsigned TokenCount = 0;
421+
422+
/// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
423+
/// warning, or zero for unlimited.
424+
unsigned MaxTokens = 0;
425+
SourceLocation MaxTokensOverrideLoc;
426+
419427
public:
420428
struct PreambleSkipInfo {
421429
SourceLocation HashTokenLoc;
@@ -1010,6 +1018,19 @@ class Preprocessor {
10101018
}
10111019
/// \}
10121020

1021+
/// Get the number of tokens processed so far.
1022+
unsigned getTokenCount() const { return TokenCount; }
1023+
1024+
/// Get the max number of tokens before issuing a -Wmax-tokens warning.
1025+
unsigned getMaxTokens() const { return MaxTokens; }
1026+
1027+
void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1028+
MaxTokens = Value;
1029+
MaxTokensOverrideLoc = Loc;
1030+
};
1031+
1032+
SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1033+
10131034
/// Register a function that would be called on each token in the final
10141035
/// expanded token stream.
10151036
/// This also reports annotation tokens produced by the parser.

clang/include/clang/Parse/Parser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ class Parser : public CodeCompletionHandler {
201201
std::unique_ptr<PragmaHandler> STDCCXLIMITHandler;
202202
std::unique_ptr<PragmaHandler> STDCUnknownHandler;
203203
std::unique_ptr<PragmaHandler> AttributePragmaHandler;
204+
std::unique_ptr<PragmaHandler> MaxTokensHerePragmaHandler;
205+
std::unique_ptr<PragmaHandler> MaxTokensTotalPragmaHandler;
204206

205207
std::unique_ptr<CommentHandler> CommentSemaHandler;
206208

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5717,6 +5717,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
57175717
Args.AddLastArg(CmdArgs, options::OPT_dM);
57185718
Args.AddLastArg(CmdArgs, options::OPT_dD);
57195719

5720+
Args.AddLastArg(CmdArgs, options::OPT_fmax_tokens);
5721+
57205722
// Handle serialized diagnostics.
57215723
if (Arg *A = Args.getLastArg(options::OPT__serialize_diags)) {
57225724
CmdArgs.push_back("-serialize-diagnostic-file");

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3292,6 +3292,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
32923292

32933293
Opts.CompleteMemberPointers = Args.hasArg(OPT_fcomplete_member_pointers);
32943294
Opts.BuildingPCHWithObjectFile = Args.hasArg(OPT_building_pch_with_obj);
3295+
3296+
Opts.MaxTokens = getLastArgIntValue(Args, OPT_fmax_tokens, 0, Diags);
32953297
}
32963298

32973299
static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) {

clang/lib/Lex/Preprocessor.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
166166
this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
167167
if (ExcludedConditionalDirectiveSkipMappings)
168168
ExcludedConditionalDirectiveSkipMappings->clear();
169+
170+
MaxTokens = LangOpts.MaxTokens;
169171
}
170172

171173
Preprocessor::~Preprocessor() {
@@ -962,8 +964,12 @@ void Preprocessor::Lex(Token &Result) {
962964

963965
LastTokenWasAt = Result.is(tok::at);
964966
--LexLevel;
965-
if (OnToken && LexLevel == 0 && !Result.getFlag(Token::IsReinjected))
966-
OnToken(Result);
967+
968+
if (LexLevel == 0 && !Result.getFlag(Token::IsReinjected)) {
969+
++TokenCount;
970+
if (OnToken)
971+
OnToken(Result);
972+
}
967973
}
968974

969975
/// Lex a header-name token (including one formed from header-name-tokens if

clang/lib/Parse/ParsePragma.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,18 @@ struct PragmaAttributeHandler : public PragmaHandler {
262262
ParsedAttributes AttributesForPragmaAttribute;
263263
};
264264

265+
struct PragmaMaxTokensHereHandler : public PragmaHandler {
266+
PragmaMaxTokensHereHandler() : PragmaHandler("max_tokens_here") {}
267+
void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
268+
Token &FirstToken) override;
269+
};
270+
271+
struct PragmaMaxTokensTotalHandler : public PragmaHandler {
272+
PragmaMaxTokensTotalHandler() : PragmaHandler("max_tokens_total") {}
273+
void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
274+
Token &FirstToken) override;
275+
};
276+
265277
} // end namespace
266278

267279
void Parser::initializePragmaHandlers() {
@@ -382,6 +394,12 @@ void Parser::initializePragmaHandlers() {
382394
AttributePragmaHandler =
383395
std::make_unique<PragmaAttributeHandler>(AttrFactory);
384396
PP.AddPragmaHandler("clang", AttributePragmaHandler.get());
397+
398+
MaxTokensHerePragmaHandler = std::make_unique<PragmaMaxTokensHereHandler>();
399+
PP.AddPragmaHandler("clang", MaxTokensHerePragmaHandler.get());
400+
401+
MaxTokensTotalPragmaHandler = std::make_unique<PragmaMaxTokensTotalHandler>();
402+
PP.AddPragmaHandler("clang", MaxTokensTotalPragmaHandler.get());
385403
}
386404

387405
void Parser::resetPragmaHandlers() {
@@ -487,6 +505,12 @@ void Parser::resetPragmaHandlers() {
487505

488506
PP.RemovePragmaHandler("clang", AttributePragmaHandler.get());
489507
AttributePragmaHandler.reset();
508+
509+
PP.RemovePragmaHandler("clang", MaxTokensHerePragmaHandler.get());
510+
MaxTokensHerePragmaHandler.reset();
511+
512+
PP.RemovePragmaHandler("clang", MaxTokensTotalPragmaHandler.get());
513+
MaxTokensTotalPragmaHandler.reset();
490514
}
491515

492516
/// Handle the annotation token produced for #pragma unused(...)
@@ -3279,3 +3303,64 @@ void PragmaAttributeHandler::HandlePragma(Preprocessor &PP,
32793303
PP.EnterTokenStream(std::move(TokenArray), 1,
32803304
/*DisableMacroExpansion=*/false, /*IsReinject=*/false);
32813305
}
3306+
3307+
// Handle '#pragma clang max_tokens 12345'.
3308+
void PragmaMaxTokensHereHandler::HandlePragma(Preprocessor &PP,
3309+
PragmaIntroducer Introducer,
3310+
Token &Tok) {
3311+
PP.Lex(Tok);
3312+
if (Tok.is(tok::eod)) {
3313+
PP.Diag(Tok.getLocation(), diag::err_pragma_missing_argument)
3314+
<< "clang max_tokens_here" << /*Expected=*/true << "integer";
3315+
return;
3316+
}
3317+
3318+
SourceLocation Loc = Tok.getLocation();
3319+
uint64_t MaxTokens;
3320+
if (Tok.isNot(tok::numeric_constant) ||
3321+
!PP.parseSimpleIntegerLiteral(Tok, MaxTokens)) {
3322+
PP.Diag(Tok.getLocation(), diag::err_pragma_expected_integer)
3323+
<< "clang max_tokens_here";
3324+
return;
3325+
}
3326+
3327+
if (Tok.isNot(tok::eod)) {
3328+
PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
3329+
<< "clang max_tokens_here";
3330+
return;
3331+
}
3332+
3333+
if (PP.getTokenCount() > MaxTokens) {
3334+
PP.Diag(Loc, diag::warn_max_tokens)
3335+
<< PP.getTokenCount() << (unsigned)MaxTokens;
3336+
}
3337+
}
3338+
3339+
// Handle '#pragma clang max_file_tokens 12345'.
3340+
void PragmaMaxTokensTotalHandler::HandlePragma(Preprocessor &PP,
3341+
PragmaIntroducer Introducer,
3342+
Token &Tok) {
3343+
PP.Lex(Tok);
3344+
if (Tok.is(tok::eod)) {
3345+
PP.Diag(Tok.getLocation(), diag::err_pragma_missing_argument)
3346+
<< "clang max_tokens_total" << /*Expected=*/true << "integer";
3347+
return;
3348+
}
3349+
3350+
SourceLocation Loc = Tok.getLocation();
3351+
uint64_t MaxTokens;
3352+
if (Tok.isNot(tok::numeric_constant) ||
3353+
!PP.parseSimpleIntegerLiteral(Tok, MaxTokens)) {
3354+
PP.Diag(Tok.getLocation(), diag::err_pragma_expected_integer)
3355+
<< "clang max_tokens_total";
3356+
return;
3357+
}
3358+
3359+
if (Tok.isNot(tok::eod)) {
3360+
PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
3361+
<< "clang max_tokens_total";
3362+
return;
3363+
}
3364+
3365+
PP.overrideMaxTokens(MaxTokens, Loc);
3366+
}

0 commit comments

Comments
 (0)