Skip to content

Commit

Permalink
[clang-tidy] new check: bugprone-signed-char-misuse
Browse files Browse the repository at this point in the history
Summary:
This check searches for signed char -> integer conversions which might
indicate programming error, because of the misinterpretation of char
values. A signed char might store the non-ASCII characters as negative
values. The human programmer probably expects that after an integer
conversion the converted value matches with the character code
(a value from [0..255]), however, the actual value is in
[-128..127] interval.

See also:
STR34-C. Cast characters to unsigned char before converting to larger integer sizes
<https://wiki.sei.cmu.edu/confluence/display/c/STR34-C.+Cast+characters+to+unsigned+char+before+converting+to+larger+integer+sizes>

By now this check is limited to assignment / variable declarations.
If we would catch all signed char -> integer conversion, then it would
produce a lot of findings and also false positives. So I added only
this use case now, but this check can be extended with additional
use cases later.
The CERT documentation mentions another use case when the char is
used for array subscript. Next to that a third use case can be
the signed char - unsigned char comparison, which also a use case
where things happen unexpectedly because of conversion to integer.

Reviewers: alexfh, hokein, aaron.ballman

Reviewed By: aaron.ballman

Subscribers: sylvestre.ledru, whisperity, Eugene.Zelenko, mgorny, xazax.hun, cfe-commits

Tags: #clang, #clang-tools-extra

Differential Revision: https://reviews.llvm.org/D71174
  • Loading branch information
tzolnai committed Jan 6, 2020
1 parent 8eba3fb commit 350da40
Show file tree
Hide file tree
Showing 11 changed files with 458 additions and 0 deletions.
3 changes: 3 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
Expand Up @@ -36,6 +36,7 @@
#include "NotNullTerminatedResultCheck.h"
#include "ParentVirtualCallCheck.h"
#include "PosixReturnCheck.h"
#include "SignedCharMisuseCheck.h"
#include "SizeofContainerCheck.h"
#include "SizeofExpressionCheck.h"
#include "StringConstructorCheck.h"
Expand Down Expand Up @@ -119,6 +120,8 @@ class BugproneModule : public ClangTidyModule {
"bugprone-parent-virtual-call");
CheckFactories.registerCheck<PosixReturnCheck>(
"bugprone-posix-return");
CheckFactories.registerCheck<SignedCharMisuseCheck>(
"bugprone-signed-char-misuse");
CheckFactories.registerCheck<SizeofContainerCheck>(
"bugprone-sizeof-container");
CheckFactories.registerCheck<SizeofExpressionCheck>(
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
Expand Up @@ -28,6 +28,7 @@ add_clang_library(clangTidyBugproneModule
NotNullTerminatedResultCheck.cpp
ParentVirtualCallCheck.cpp
PosixReturnCheck.cpp
SignedCharMisuseCheck.cpp
SizeofContainerCheck.cpp
SizeofExpressionCheck.cpp
StringConstructorCheck.cpp
Expand Down
104 changes: 104 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp
@@ -0,0 +1,104 @@
//===--- SignedCharMisuseCheck.cpp - clang-tidy ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "SignedCharMisuseCheck.h"
#include "../utils/OptionsUtils.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"

using namespace clang::ast_matchers;
using namespace clang::ast_matchers::internal;

namespace clang {
namespace tidy {
namespace bugprone {

static Matcher<TypedefDecl> hasAnyListedName(const std::string &Names) {
const std::vector<std::string> NameList =
utils::options::parseStringList(Names);
return hasAnyName(std::vector<StringRef>(NameList.begin(), NameList.end()));
}

SignedCharMisuseCheck::SignedCharMisuseCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
CharTypdefsToIgnoreList(Options.get("CharTypdefsToIgnore", "")) {}

void SignedCharMisuseCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "CharTypdefsToIgnore", CharTypdefsToIgnoreList);
}

void SignedCharMisuseCheck::registerMatchers(MatchFinder *Finder) {
// We can ignore typedefs which are some kind of integer types
// (e.g. typedef char sal_Int8). In this case, we don't need to
// worry about the misinterpretation of char values.
const auto IntTypedef = qualType(
hasDeclaration(typedefDecl(hasAnyListedName(CharTypdefsToIgnoreList))));

const auto SignedCharType = expr(hasType(qualType(
allOf(isAnyCharacter(), isSignedInteger(), unless(IntTypedef)))));

const auto IntegerType = qualType(allOf(isInteger(), unless(isAnyCharacter()),
unless(booleanType())))
.bind("integerType");

// We are interested in signed char -> integer conversion.
const auto ImplicitCastExpr =
implicitCastExpr(hasSourceExpression(SignedCharType),
hasImplicitDestinationType(IntegerType))
.bind("castExpression");

const auto CStyleCastExpr = cStyleCastExpr(has(ImplicitCastExpr));
const auto StaticCastExpr = cxxStaticCastExpr(has(ImplicitCastExpr));
const auto FunctionalCastExpr = cxxFunctionalCastExpr(has(ImplicitCastExpr));

// We catch any type of casts to an integer. We need to have these cast
// expressions explicitly to catch only those casts which are direct children
// of an assignment/declaration.
const auto CastExpr = expr(anyOf(ImplicitCastExpr, CStyleCastExpr,
StaticCastExpr, FunctionalCastExpr));

// Catch assignments with the suspicious type conversion.
const auto AssignmentOperatorExpr = expr(binaryOperator(
hasOperatorName("="), hasLHS(hasType(IntegerType)), hasRHS(CastExpr)));

Finder->addMatcher(AssignmentOperatorExpr, this);

// Catch declarations with the suspicious type conversion.
const auto Declaration =
varDecl(isDefinition(), hasType(IntegerType), hasInitializer(CastExpr));

Finder->addMatcher(Declaration, this);
}

void SignedCharMisuseCheck::check(const MatchFinder::MatchResult &Result) {
const auto *CastExpression =
Result.Nodes.getNodeAs<ImplicitCastExpr>("castExpression");
const auto *IntegerType = Result.Nodes.getNodeAs<QualType>("integerType");
assert(CastExpression);
assert(IntegerType);

// Ignore the match if we know that the value is not negative.
// The potential misinterpretation happens for negative values only.
Expr::EvalResult EVResult;
if (!CastExpression->isValueDependent() &&
CastExpression->getSubExpr()->EvaluateAsInt(EVResult, *Result.Context)) {
llvm::APSInt Value1 = EVResult.Val.getInt();
if (Value1.isNonNegative())
return;
}

diag(CastExpression->getBeginLoc(),
"'signed char' to %0 conversion; "
"consider casting to 'unsigned char' first.")
<< *IntegerType;
}

} // namespace bugprone
} // namespace tidy
} // namespace clang
44 changes: 44 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h
@@ -0,0 +1,44 @@
//===--- SignedCharMisuseCheck.h - clang-tidy -------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H

#include "../ClangTidyCheck.h"

namespace clang {
namespace tidy {
namespace bugprone {

/// Finds ``signed char`` -> integer conversions which might indicate a programming
/// error. The basic problem with the ``signed char``, that it might store the
/// non-ASCII characters as negative values. The human programmer probably
/// expects that after an integer conversion the converted value matches with the
/// character code (a value from [0..255]), however, the actual value is in
/// [-128..127] interval. This also applies to the plain ``char`` type on
/// those implementations which represent ``char`` similar to ``signed char``.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-signed-char-misuse.html
class SignedCharMisuseCheck : public ClangTidyCheck {
public:
SignedCharMisuseCheck(StringRef Name, ClangTidyContext *Context);

void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;

private:
const std::string CharTypdefsToIgnoreList;
};

} // namespace bugprone
} // namespace tidy
} // namespace clang

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNEDCHARMISUSECHECK_H
6 changes: 6 additions & 0 deletions clang-tools-extra/docs/ReleaseNotes.rst
Expand Up @@ -94,6 +94,12 @@ Improvements to clang-tidy
Without the null terminator it can result in undefined behaviour when the
string is read.

- New :doc:`bugprone-signed-char-misuse
<clang-tidy/checks/bugprone-signed-char-misuse>` check.

Finds ``signed char`` -> integer conversions which might indicate a programming
error.

- New :doc:`cert-mem57-cpp
<clang-tidy/checks/cert-mem57-cpp>` check.

Expand Down
@@ -0,0 +1,76 @@
.. title:: clang-tidy - bugprone-signed-char-misuse

bugprone-signed-char-misuse
===========================

Finds ``signed char`` -> integer conversions which might indicate a programming
error. The basic problem with the ``signed char``, that it might store the
non-ASCII characters as negative values. The human programmer probably
expects that after an integer conversion the converted value matches with the
character code (a value from [0..255]), however, the actual value is in
[-128..127] interval. This also applies to the plain ``char`` type on
those implementations which represent ``char`` similar to ``signed char``.

To avoid this kind of misinterpretation, the desired way of converting from a
``signed char`` to an integer value is converting to ``unsigned char`` first,
which stores all the characters in the positive [0..255] interval which matches
with the known character codes.

It depends on the actual platform whether ``char`` is handled as ``signed char``
by default and so it is caught by this check or not. To change the default behavior
you can use ``-funsigned-char`` and ``-fsigned-char`` compilation options.

Currently, this check is limited to assignments and variable declarations,
where a ``signed char`` is assigned to an integer variable. There are other
use cases where the same misinterpretation might lead to similar bogus
behavior.

See also:
`STR34-C. Cast characters to unsigned char before converting to larger integer sizes
<https://wiki.sei.cmu.edu/confluence/display/c/STR34-C.+Cast+characters+to+unsigned+char+before+converting+to+larger+integer+sizes>`_

A good example from the CERT description when a ``char`` variable is used to
read from a file that might contain non-ASCII characters. The problem comes
up when the code uses the ``-1`` integer value as EOF, while the 255 character
code is also stored as ``-1`` in two's complement form of char type.
See a simple example of this bellow. This code stops not only when it reaches
the end of the file, but also when it gets a character with the 255 code.

.. code-block:: c++

#define EOF (-1)

int read(void) {
char CChar;
int IChar = EOF;

if (readChar(CChar)) {
IChar = CChar;
}
return IChar;
}

A proper way to fix the code above is converting the ``char`` variable to
an ``unsigned char`` value first.

.. code-block:: c++

#define EOF (-1)

int read(void) {
char CChar;
int IChar = EOF;

if (readChar(CChar)) {
IChar = static_cast<unsigned char>(CChar);
}
return IChar;
}

.. option:: CharTypdefsToIgnore

A semicolon-separated list of typedef names. In this list, we can list
typedefs for ``char`` or ``signed char``, which will be ignored by the
check. This is useful when a typedef introduces an integer alias like
``sal_Int8`` or ``int8_t``. In this case, human misinterpretation is not
an issue.
1 change: 1 addition & 0 deletions clang-tools-extra/docs/clang-tidy/checks/list.rst
Expand Up @@ -66,6 +66,7 @@ Clang-Tidy Checks
`bugprone-not-null-terminated-result <bugprone-not-null-terminated-result.html>`_, "Yes"
`bugprone-parent-virtual-call <bugprone-parent-virtual-call.html>`_, "Yes"
`bugprone-posix-return <bugprone-posix-return.html>`_, "Yes"
`bugprone-signed-char-misuse <bugprone-signed-char-misuse.html>`_,
`bugprone-sizeof-container <bugprone-sizeof-container.html>`_,
`bugprone-sizeof-expression <bugprone-sizeof-expression.html>`_,
`bugprone-string-constructor <bugprone-string-constructor.html>`_, "Yes"
Expand Down
@@ -0,0 +1,9 @@
// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t -- -- -fsigned-char

int PlainChar() {
char CCharacter = -5;
int NCharacter = CCharacter;
// CHECK-MESSAGES: [[@LINE-1]]:20: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse]

return NCharacter;
}
@@ -0,0 +1,17 @@
// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t -- -- -funsigned-char

// Test framework needs something to catch, otherwise it fails.
int SignedChar() {
signed char CCharacter = -5;
int NCharacter;
NCharacter = CCharacter;
// CHECK-MESSAGES: [[@LINE-1]]:16: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse]

return NCharacter;
}

int PlainChar() {
char CCharacter = -5;
int NCharacter = CCharacter; // no warning
return NCharacter;
}
@@ -0,0 +1,74 @@
// RUN: %check_clang_tidy %s bugprone-signed-char-misuse %t \
// RUN: -config='{CheckOptions: \
// RUN: [{key: bugprone-signed-char-misuse.CharTypdefsToIgnore, value: "sal_Int8;int8_t"}]}' \
// RUN: --

///////////////////////////////////////////////////////////////////
/// Test cases correctly caught by the check.

// Check that a simple test case is still caught.
int SimpleAssignment() {
signed char CCharacter = -5;
int NCharacter;
NCharacter = CCharacter;
// CHECK-MESSAGES: [[@LINE-1]]:16: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse]

return NCharacter;
}

typedef signed char sal_Char;

int TypedefNotInIgnorableList() {
sal_Char CCharacter = 'a';
int NCharacter = CCharacter;
// CHECK-MESSAGES: [[@LINE-1]]:20: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse]

return NCharacter;
}

///////////////////////////////////////////////////////////////////
/// Test cases correctly ignored by the check.

typedef signed char sal_Int8;

int OneIgnorableTypedef() {
sal_Int8 CCharacter = 'a';
int NCharacter = CCharacter;

return NCharacter;
}

typedef signed char int8_t;

int OtherIgnorableTypedef() {
int8_t CCharacter = 'a';
int NCharacter = CCharacter;

return NCharacter;
}

///////////////////////////////////////////////////////////////////
/// Test cases which should be caught by the check.

namespace boost {

template <class T>
class optional {
T *member;

public:
optional(T value) {
member = new T(value);
}

T operator*() { return *member; }
};

} // namespace boost

int DereferenceWithTypdef(boost::optional<sal_Int8> param) {
int NCharacter = *param;
// CHECK-MESSAGES: [[@LINE-1]]:20: warning: 'signed char' to 'int' conversion; consider casting to 'unsigned char' first. [bugprone-signed-char-misuse]

return NCharacter;
}

0 comments on commit 350da40

Please sign in to comment.