Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[clang-tidy] new check: bugprone-signed-char-misuse
Summary: This check searches for signed char -> integer conversions which might indicate programming error, because of the misinterpretation of char values. A signed char might store the non-ASCII characters as negative values. The human programmer probably expects that after an integer conversion the converted value matches with the character code (a value from [0..255]), however, the actual value is in [-128..127] interval. See also: STR34-C. Cast characters to unsigned char before converting to larger integer sizes <https://wiki.sei.cmu.edu/confluence/display/c/STR34-C.+Cast+characters+to+unsigned+char+before+converting+to+larger+integer+sizes> By now this check is limited to assignment / variable declarations. If we would catch all signed char -> integer conversion, then it would produce a lot of findings and also false positives. So I added only this use case now, but this check can be extended with additional use cases later. The CERT documentation mentions another use case when the char is used for array subscript. Next to that a third use case can be the signed char - unsigned char comparison, which also a use case where things happen unexpectedly because of conversion to integer. Reviewers: alexfh, hokein, aaron.ballman Reviewed By: aaron.ballman Subscribers: sylvestre.ledru, whisperity, Eugene.Zelenko, mgorny, xazax.hun, cfe-commits Tags: #clang, #clang-tools-extra Differential Revision: https://reviews.llvm.org/D71174
- Loading branch information
Showing
11 changed files
with
458 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
//===--- SignedCharMisuseCheck.cpp - clang-tidy ---------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "SignedCharMisuseCheck.h" | ||
#include "../utils/OptionsUtils.h" | ||
#include "clang/AST/ASTContext.h" | ||
#include "clang/ASTMatchers/ASTMatchFinder.h" | ||
|
||
using namespace clang::ast_matchers; | ||
using namespace clang::ast_matchers::internal; | ||
|
||
namespace clang { | ||
namespace tidy { | ||
namespace bugprone { | ||
|
||
static Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) { | ||
const std::vector<std::string> NameList = | ||
utils::options::parseStringList(Names); | ||
return hasAnyName(std::vector<StringRef>(NameList.begin(), NameList.end())); | ||
} | ||
|
||
SignedCharMisuseCheck::SignedCharMisuseCheck(StringRef Name, | ||
ClangTidyContext *Context) | ||
: ClangTidyCheck(Name, Context), | ||
CharTypdefsToIgnoreList(Options.get("CharTypdefsToIgnore", "")) {} | ||
|
||
void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { | ||
Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList); | ||
} | ||
|
||
void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) { | ||
// We can ignore typedefs which are some kind of integer types | ||
// (e.g. typedef char sal_Int8). In this case, we don't need to | ||
// worry about the misinterpretation of char values. | ||
const auto IntTypedef = qualType( | ||
hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList)))); | ||
|
||
const auto SignedCharType = expr(hasType(qualType( | ||
allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef))))); | ||
|
||
const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()), | ||
unless(booleanType()))) | ||
.bind("integerType"); | ||
|
||
// We are interested in signed char -> integer conversion. | ||
const auto ImplicitCastExpr = | ||
implicitCastExpr(hasSourceExpression(SignedCharType), | ||
hasImplicitDestinationType(IntegerType)) | ||
.bind("castExpression"); | ||
|
||
const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr)); | ||
const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr)); | ||
const auto FunctionalCastExpr = cxxFunctionalCastExpr(has(ImplicitCastExpr)); | ||
|
||
// We catch any type of casts to an integer. We need to have these cast | ||
// expressions explicitly to catch only those casts which are direct children | ||
// of an assignment/declaration. | ||
const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr, | ||
StaticCastExpr, FunctionalCastExpr)); | ||
|
||
// Catch assignments with the suspicious type conversion. | ||
const auto AssignmentOperatorExpr = expr(binaryOperator( | ||
hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr))); | ||
|
||
Finder->addMatcher(AssignmentOperatorExpr, this); | ||
|
||
// Catch declarations with the suspicious type conversion. | ||
const auto Declaration = | ||
varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr)); | ||
|
||
Finder->addMatcher(Declaration, this); | ||
} | ||
|
||
void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) { | ||
const auto *CastExpression = | ||
Result.Nodes.getNodeAs<ImplicitCastExpr>("castExpression"); | ||
const auto *IntegerType = Result.Nodes.getNodeAs<QualType>("integerType"); | ||
assert(CastExpression); | ||
assert(IntegerType); | ||
|
||
// Ignore the match if we know that the value is not negative. | ||
// The potential misinterpretation happens for negative values only. | ||
Expr::EvalResult EVResult; | ||
if (!CastExpression->isValueDependent() && | ||
CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) { | ||
llvm::APSInt Value1 = EVResult.Val.getInt(); | ||
if (Value1.isNonNegative()) | ||
return; | ||
} | ||
|
||
diag(CastExpression->getBeginLoc(), | ||
"'signed char' to %0 conversion; " | ||
"consider casting to 'unsigned char' first.") | ||
<< *IntegerType; | ||
} | ||
|
||
} // namespace bugprone | ||
} // namespace tidy | ||
} // namespace clang |
44 changes: 44 additions & 0 deletions
44
clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
//===--- SignedCharMisuseCheck.h - clang-tidy -------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H | ||
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H | ||
|
||
#include "../ClangTidyCheck.h" | ||
|
||
namespace clang { | ||
namespace tidy { | ||
namespace bugprone { | ||
|
||
/// Finds ``signed char`` -> integer conversions which might indicate a programming | ||
/// error. The basic problem with the ``signed char``, that it might store the | ||
/// non-ASCII characters as negative values. The human programmer probably | ||
/// expects that after an integer conversion the converted value matches with the | ||
/// character code (a value from [0..255]), however, the actual value is in | ||
/// [-128..127] interval. This also applies to the plain ``char`` type on | ||
/// those implementations which represent ``char`` similar to ``signed char``. | ||
/// | ||
/// For the user-facing documentation see: | ||
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html | ||
class SignedCharMisuseCheck : public ClangTidyCheck { | ||
public: | ||
SignedCharMisuseCheck(StringRef Name, ClangTidyContext *Context); | ||
|
||
void storeOptions(ClangTidyOptions::OptionMap &Opts) override; | ||
void registerMatchers(ast_matchers::MatchFinder *Finder) override; | ||
void check(const ast_matchers::MatchFinder::MatchResult &Result) override; | ||
|
||
private: | ||
const std::string CharTypdefsToIgnoreList; | ||
}; | ||
|
||
} // namespace bugprone | ||
} // namespace tidy | ||
} // namespace clang | ||
|
||
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
clang-tools-extra/docs/clang-tidy/checks/bugprone-signed-char-misuse.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
.. title:: clang-tidy - bugprone-signed-char-misuse | ||
|
||
bugprone-signed-char-misuse | ||
=========================== | ||
|
||
Finds ``signed char`` -> integer conversions which might indicate a programming | ||
error. The basic problem with the ``signed char``, that it might store the | ||
non-ASCII characters as negative values. The human programmer probably | ||
expects that after an integer conversion the converted value matches with the | ||
character code (a value from [0..255]), however, the actual value is in | ||
[-128..127] interval. This also applies to the plain ``char`` type on | ||
those implementations which represent ``char`` similar to ``signed char``. | ||
|
||
To avoid this kind of misinterpretation, the desired way of converting from a | ||
``signed char`` to an integer value is converting to ``unsigned char`` first, | ||
which stores all the characters in the positive [0..255] interval which matches | ||
with the known character codes. | ||
|
||
It depends on the actual platform whether ``char`` is handled as ``signed char`` | ||
by default and so it is caught by this check or not. To change the default behavior | ||
you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options. | ||
|
||
Currently, this check is limited to assignments and variable declarations, | ||
where a ``signed char`` is assigned to an integer variable. There are other | ||
use cases where the same misinterpretation might lead to similar bogus | ||
behavior. | ||
|
||
See also: | ||
`STR34-C. Cast characters to unsigned char before converting to larger integer sizes | ||
<https://wiki.sei.cmu.edu/confluence/display/c/STR34-C.+Cast+characters+to+unsigned+char+before+converting+to+larger+integer+sizes>`_ | ||
|
||
A good example from the CERT description when a ``char`` variable is used to | ||
read from a file that might contain non-ASCII characters. The problem comes | ||
up when the code uses the ``-1`` integer value as EOF, while the 255 character | ||
code is also stored as ``-1`` in two's complement form of char type. | ||
See a simple example of this bellow. This code stops not only when it reaches | ||
the end of the file, but also when it gets a character with the 255 code. | ||
|
||
.. code-block:: c++ | ||
|
||
#define EOF (-1) | ||
|
||
int read(void) { | ||
char CChar; | ||
int IChar = EOF; | ||
|
||
if (readChar(CChar)) { | ||
IChar = CChar; | ||
} | ||
return IChar; | ||
} | ||
|
||
A proper way to fix the code above is converting the ``char`` variable to | ||
an ``unsigned char`` value first. | ||
|
||
.. code-block:: c++ | ||
|
||
#define EOF (-1) | ||
|
||
int read(void) { | ||
char CChar; | ||
int IChar = EOF; | ||
|
||
if (readChar(CChar)) { | ||
IChar = static_cast<unsigned char>(CChar); | ||
} | ||
return IChar; | ||
} | ||
|
||
.. option:: CharTypdefsToIgnore | ||
|
||
A semicolon-separated list of typedef names. In this list, we can list | ||
typedefs for ``char`` or ``signed char``, which will be ignored by the | ||
check. This is useful when a typedef introduces an integer alias like | ||
``sal_Int8`` or ``int8_t``. In this case, human misinterpretation is not | ||
an issue. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-fsigned-char.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t -- -- -fsigned-char | ||
|
||
int PlainChar() { | ||
char CCharacter = -5; | ||
int NCharacter = CCharacter; | ||
// CHECK-MESSAGES: [[@LINE-1]]:20: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse] | ||
|
||
return NCharacter; | ||
} |
17 changes: 17 additions & 0 deletions
17
clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-funsigned-char.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t -- -- -funsigned-char | ||
|
||
// Test framework needs something to catch, otherwise it fails. | ||
int SignedChar() { | ||
signed char CCharacter = -5; | ||
int NCharacter; | ||
NCharacter = CCharacter; | ||
// CHECK-MESSAGES: [[@LINE-1]]:16: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse] | ||
|
||
return NCharacter; | ||
} | ||
|
||
int PlainChar() { | ||
char CCharacter = -5; | ||
int NCharacter = CCharacter; // no warning | ||
return NCharacter; | ||
} |
74 changes: 74 additions & 0 deletions
74
clang-tools-extra/test/clang-tidy/checkers/bugprone-signed-char-misuse-with-option.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t \ | ||
// RUN: -config='{CheckOptions: \ | ||
// RUN: [{key: bugprone-signed-char-misuse.CharTypdefsToIgnore, value: "sal_Int8;int8_t"}]}' \ | ||
// RUN: -- | ||
|
||
/////////////////////////////////////////////////////////////////// | ||
/// Test cases correctly caught by the check. | ||
|
||
// Check that a simple test case is still caught. | ||
int SimpleAssignment() { | ||
signed char CCharacter = -5; | ||
int NCharacter; | ||
NCharacter = CCharacter; | ||
// CHECK-MESSAGES: [[@LINE-1]]:16: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse] | ||
|
||
return NCharacter; | ||
} | ||
|
||
typedef signed char sal_Char; | ||
|
||
int TypedefNotInIgnorableList() { | ||
sal_Char CCharacter = 'a'; | ||
int NCharacter = CCharacter; | ||
// CHECK-MESSAGES: [[@LINE-1]]:20: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse] | ||
|
||
return NCharacter; | ||
} | ||
|
||
/////////////////////////////////////////////////////////////////// | ||
/// Test cases correctly ignored by the check. | ||
|
||
typedef signed char sal_Int8; | ||
|
||
int OneIgnorableTypedef() { | ||
sal_Int8 CCharacter = 'a'; | ||
int NCharacter = CCharacter; | ||
|
||
return NCharacter; | ||
} | ||
|
||
typedef signed char int8_t; | ||
|
||
int OtherIgnorableTypedef() { | ||
int8_t CCharacter = 'a'; | ||
int NCharacter = CCharacter; | ||
|
||
return NCharacter; | ||
} | ||
|
||
/////////////////////////////////////////////////////////////////// | ||
/// Test cases which should be caught by the check. | ||
|
||
namespace boost { | ||
|
||
template <class T> | ||
class optional { | ||
T *member; | ||
|
||
public: | ||
optional(T value) { | ||
member = new T(value); | ||
} | ||
|
||
T operator*() { return *member; } | ||
}; | ||
|
||
} // namespace boost | ||
|
||
int DereferenceWithTypdef(boost::optional<sal_Int8> param) { | ||
int NCharacter = *param; | ||
// CHECK-MESSAGES: [[@LINE-1]]:20: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse] | ||
|
||
return NCharacter; | ||
} |
Oops, something went wrong.