Skip to content

Commit

Permalink
[clang-tidy] Reject invalid enum initializers in C files
Browse files Browse the repository at this point in the history
C requires that enum values fit into an int.  Scan the macro tokens
present in an initializing expression and reject macros that contain
tokens that have suffixes making them larger than int.

C forbids the comma operator in enum initializing expressions, so
optionally reject comma operator.

Differential Revision: https://reviews.llvm.org/D125622

Fixes #55467
  • Loading branch information
LegalizeAdulthood committed Jun 2, 2022
1 parent 6eab5ca commit b418ef5
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 143 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "IntegralLiteralExpressionMatcher.h"

#include <algorithm>
#include <cctype>
#include <stdexcept>

Expand Down Expand Up @@ -81,6 +82,50 @@ bool IntegralLiteralExpressionMatcher::unaryOperator() {
return true;
}

static LiteralSize literalTokenSize(const Token &Tok) {
unsigned int Length = Tok.getLength();
if (Length <= 1)
return LiteralSize::Int;

bool SeenUnsigned = false;
bool SeenLong = false;
bool SeenLongLong = false;
const char *Text = Tok.getLiteralData();
for (unsigned int End = Length - 1; End > 0; --End) {
if (std::isdigit(Text[End]))
break;

if (std::toupper(Text[End]) == 'U')
SeenUnsigned = true;
else if (std::toupper(Text[End]) == 'L') {
if (SeenLong)
SeenLongLong = true;
SeenLong = true;
}
}

if (SeenLongLong) {
if (SeenUnsigned)
return LiteralSize::UnsignedLongLong;

return LiteralSize::LongLong;
}
if (SeenLong) {
if (SeenUnsigned)
return LiteralSize::UnsignedLong;

return LiteralSize::Long;
}
if (SeenUnsigned)
return LiteralSize::UnsignedInt;

return LiteralSize::Int;
}

static bool operator<(LiteralSize LHS, LiteralSize RHS) {
return static_cast<int>(LHS) < static_cast<int>(RHS);
}

bool IntegralLiteralExpressionMatcher::unaryExpr() {
if (!unaryOperator())
return false;
Expand All @@ -102,7 +147,10 @@ bool IntegralLiteralExpressionMatcher::unaryExpr() {
!isIntegralConstant(*Current)) {
return false;
}

LargestSize = std::max(LargestSize, literalTokenSize(*Current));
++Current;

return true;
}

Expand Down Expand Up @@ -217,14 +265,24 @@ bool IntegralLiteralExpressionMatcher::conditionalExpr() {
}

bool IntegralLiteralExpressionMatcher::commaExpr() {
return nonTerminalChainedExpr<tok::TokenKind::comma>(
&IntegralLiteralExpressionMatcher::conditionalExpr);
auto Pred = CommaAllowed
? std::function<bool(Token)>(
[](Token Tok) { return Tok.is(tok::TokenKind::comma); })
: std::function<bool(Token)>([](Token) { return false; });
return nonTerminalChainedExpr(
&IntegralLiteralExpressionMatcher::conditionalExpr, Pred);
}

bool IntegralLiteralExpressionMatcher::expr() { return commaExpr(); }

bool IntegralLiteralExpressionMatcher::match() {
return expr() && Current == End;
// Top-level allowed expression is conditionalExpr(), not expr(), because
// comma operators are only valid initializers when used inside parentheses.
return conditionalExpr() && Current == End;
}

LiteralSize IntegralLiteralExpressionMatcher::largestLiteralSize() const {
return LargestSize;
}

} // namespace modernize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,27 @@ namespace clang {
namespace tidy {
namespace modernize {

enum class LiteralSize {
Unknown = 0,
Int,
UnsignedInt,
Long,
UnsignedLong,
LongLong,
UnsignedLongLong
};

// Parses an array of tokens and returns true if they conform to the rules of
// C++ for whole expressions involving integral literals. Follows the operator
// precedence rules of C++.
// precedence rules of C++. Optionally exclude comma operator expressions.
class IntegralLiteralExpressionMatcher {
public:
IntegralLiteralExpressionMatcher(ArrayRef<Token> Tokens)
: Current(Tokens.begin()), End(Tokens.end()) {}
IntegralLiteralExpressionMatcher(ArrayRef<Token> Tokens, bool CommaAllowed)
: Current(Tokens.begin()), End(Tokens.end()), CommaAllowed(CommaAllowed) {
}

bool match();
LiteralSize largestLiteralSize() const;

private:
bool advance();
Expand Down Expand Up @@ -64,6 +76,8 @@ class IntegralLiteralExpressionMatcher {

ArrayRef<Token>::iterator Current;
ArrayRef<Token>::iterator End;
LiteralSize LargestSize{LiteralSize::Unknown};
bool CommaAllowed;
};

} // namespace modernize
Expand Down
10 changes: 8 additions & 2 deletions clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,14 @@ void MacroToEnumCallbacks::FileChanged(SourceLocation Loc,

bool MacroToEnumCallbacks::isInitializer(ArrayRef<Token> MacroTokens)
{
IntegralLiteralExpressionMatcher Matcher(MacroTokens);
return Matcher.match();
IntegralLiteralExpressionMatcher Matcher(MacroTokens, LangOpts.C99 == 0);
bool Matched = Matcher.match();
bool isC = !LangOpts.CPlusPlus;
if (isC && (Matcher.largestLiteralSize() != LiteralSize::Int &&
Matcher.largestLiteralSize() != LiteralSize::UnsignedInt))
return false;

return Matched;
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// RUN: %check_clang_tidy %s modernize-macro-to-enum %t

// C requires enum values to fit into an int.
#define TOO_BIG1 1L
#define TOO_BIG2 1UL
#define TOO_BIG3 1LL
#define TOO_BIG4 1ULL

// C forbids comma operator in initializing expressions.
#define BAD_OP 1, 2

#define SIZE_OK1 1
#define SIZE_OK2 1U
// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: replace macro with enum [modernize-macro-to-enum]
// CHECK-MESSAGES: :[[@LINE-3]]:9: warning: macro 'SIZE_OK1' defines an integral constant; prefer an enum instead
// CHECK-MESSAGES: :[[@LINE-3]]:9: warning: macro 'SIZE_OK2' defines an integral constant; prefer an enum instead
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,13 @@
// CHECK-FIXES-NEXT: COMPLEX_PAREN6 = ((+1))
// CHECK-FIXES-NEXT: };

#define GOOD_COMMA (1, 2)
// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: replace macro with enum
// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: macro 'GOOD_COMMA' defines an integral constant; prefer an enum instead
// CHECK-FIXES: enum {
// CHECK-FIXES-NEXT: GOOD_COMMA = (1, 2)
// CHECK-FIXES-NEXT: };

// Macros appearing in conditional expressions can't be replaced
// by enums.
#define USE_FOO 1
Expand Down Expand Up @@ -322,6 +329,9 @@ inline void used_ifndef() {}
#define EPS2 1e5
#define EPS3 1.

// Ignore macros invoking comma operator unless they are inside parens.
#define BAD_COMMA 1, 2

extern void draw(unsigned int Color);

void f()
Expand Down
Loading

0 comments on commit b418ef5

Please sign in to comment.