Skip to content

Commit

Permalink
[clang-tidy] Add bugprone-suspicious-stringview-data-usage check (#83716
Browse files Browse the repository at this point in the history
)

This check identifies suspicious usages of std::string_view::data() that
could lead to reading out-of-bounds data due to inadequate or incorrect
string null termination.

Closes #80854
  • Loading branch information
PiotrZSL committed Mar 19, 2024
1 parent 0a40f5d commit 28c1279
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 0 deletions.
3 changes: 3 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
Expand Up @@ -73,6 +73,7 @@
#include "SuspiciousReallocUsageCheck.h"
#include "SuspiciousSemicolonCheck.h"
#include "SuspiciousStringCompareCheck.h"
#include "SuspiciousStringviewDataUsageCheck.h"
#include "SwappedArgumentsCheck.h"
#include "SwitchMissingDefaultCaseCheck.h"
#include "TerminatingContinueCheck.h"
Expand Down Expand Up @@ -218,6 +219,8 @@ class BugproneModule : public ClangTidyModule {
"bugprone-suspicious-semicolon");
CheckFactories.registerCheck<SuspiciousStringCompareCheck>(
"bugprone-suspicious-string-compare");
CheckFactories.registerCheck<SuspiciousStringviewDataUsageCheck>(
"bugprone-suspicious-stringview-data-usage");
CheckFactories.registerCheck<SwappedArgumentsCheck>(
"bugprone-swapped-arguments");
CheckFactories.registerCheck<TerminatingContinueCheck>(
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
Expand Up @@ -26,6 +26,7 @@ add_clang_library(clangTidyBugproneModule
ImplicitWideningOfMultiplicationResultCheck.cpp
InaccurateEraseCheck.cpp
IncorrectEnableIfCheck.cpp
SuspiciousStringviewDataUsageCheck.cpp
SwitchMissingDefaultCaseCheck.cpp
IncDecInConditionsCheck.cpp
IncorrectRoundingsCheck.cpp
Expand Down
@@ -0,0 +1,97 @@
//===--- SuspiciousStringviewDataUsageCheck.cpp - clang-tidy --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "SuspiciousStringviewDataUsageCheck.h"
#include "../utils/Matchers.h"
#include "../utils/OptionsUtils.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"

using namespace clang::ast_matchers;

namespace clang::tidy::bugprone {

SuspiciousStringviewDataUsageCheck::SuspiciousStringviewDataUsageCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
StringViewTypes(utils::options::parseStringList(Options.get(
"StringViewTypes", "::std::basic_string_view;::llvm::StringRef"))),
AllowedCallees(
utils::options::parseStringList(Options.get("AllowedCallees", ""))) {}

void SuspiciousStringviewDataUsageCheck::storeOptions(
ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "StringViewTypes",
utils::options::serializeStringList(StringViewTypes));
Options.store(Opts, "AllowedCallees",
utils::options::serializeStringList(AllowedCallees));
}

bool SuspiciousStringviewDataUsageCheck::isLanguageVersionSupported(
const LangOptions &LangOpts) const {
return LangOpts.CPlusPlus;
}

std::optional<TraversalKind>
SuspiciousStringviewDataUsageCheck::getCheckTraversalKind() const {
return TK_AsIs;
}

void SuspiciousStringviewDataUsageCheck::registerMatchers(MatchFinder *Finder) {

auto AncestorCall = anyOf(
cxxConstructExpr(), callExpr(unless(cxxOperatorCallExpr())), lambdaExpr(),
initListExpr(
hasType(qualType(hasCanonicalType(hasDeclaration(recordDecl()))))));

auto DataMethod =
cxxMethodDecl(hasName("data"),
ofClass(matchers::matchesAnyListedName(StringViewTypes)));

auto SizeCall = cxxMemberCallExpr(
callee(cxxMethodDecl(hasAnyName("size", "length"))),
on(ignoringParenImpCasts(
matchers::isStatementIdenticalToBoundNode("self"))));

auto DescendantSizeCall = expr(hasDescendant(
expr(SizeCall, hasAncestor(expr(AncestorCall).bind("ancestor-size")),
hasAncestor(expr(equalsBoundNode("parent"),
equalsBoundNode("ancestor-size"))))));

Finder->addMatcher(
cxxMemberCallExpr(
on(ignoringParenImpCasts(expr().bind("self"))), callee(DataMethod),
expr().bind("data-call"),
hasParent(expr(anyOf(
invocation(
expr().bind("parent"), unless(cxxOperatorCallExpr()),
hasAnyArgument(
ignoringParenImpCasts(equalsBoundNode("data-call"))),
unless(hasAnyArgument(ignoringParenImpCasts(SizeCall))),
unless(hasAnyArgument(DescendantSizeCall)),
hasDeclaration(namedDecl(
unless(matchers::matchesAnyListedName(AllowedCallees))))),
initListExpr(expr().bind("parent"),
hasType(qualType(hasCanonicalType(hasDeclaration(
recordDecl(unless(matchers::matchesAnyListedName(
AllowedCallees))))))),
unless(DescendantSizeCall)))))),
this);
}

void SuspiciousStringviewDataUsageCheck::check(
const MatchFinder::MatchResult &Result) {
const auto *DataCallExpr =
Result.Nodes.getNodeAs<CXXMemberCallExpr>("data-call");
diag(DataCallExpr->getExprLoc(),
"result of a `data()` call may not be null terminated, provide size "
"information to the callee to prevent potential issues")
<< DataCallExpr->getCallee()->getSourceRange();
}

} // namespace clang::tidy::bugprone
@@ -0,0 +1,38 @@
//===--- SuspiciousStringviewDataUsageCheck.h - clang-tidy -------//C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSSTRINGVIEWDATAUSAGECHECK_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSSTRINGVIEWDATAUSAGECHECK_H

#include "../ClangTidyCheck.h"

namespace clang::tidy::bugprone {

/// Identifies suspicious usages of std::string_view::data() that could lead to
/// reading out-of-bounds data due to inadequate or incorrect string null
/// termination.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.html
class SuspiciousStringviewDataUsageCheck : public ClangTidyCheck {
public:
SuspiciousStringviewDataUsageCheck(StringRef Name, ClangTidyContext *Context);
void registerMatchers(ast_matchers::MatchFinder *Finder) override;
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
bool isLanguageVersionSupported(const LangOptions &LangOpts) const override;
std::optional<TraversalKind> getCheckTraversalKind() const override;

private:
std::vector<llvm::StringRef> StringViewTypes;
std::vector<llvm::StringRef> AllowedCallees;
};

} // namespace clang::tidy::bugprone

#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SUSPICIOUSSTRINGVIEWDATAUSAGECHECK_H
7 changes: 7 additions & 0 deletions clang-tools-extra/docs/ReleaseNotes.rst
Expand Up @@ -110,6 +110,13 @@ New checks
Detects error-prone Curiously Recurring Template Pattern usage, when the CRTP
can be constructed outside itself and the derived class.

- New :doc:`bugprone-suspicious-stringview-data-usage
<clang-tidy/checks/bugprone/suspicious-stringview-data-usage>` check.

Identifies suspicious usages of ``std::string_view::data()`` that could lead
to reading out-of-bounds data due to inadequate or incorrect string null
termination.

- New :doc:`modernize-use-designated-initializers
<clang-tidy/checks/modernize/use-designated-initializers>` check.

Expand Down
@@ -0,0 +1,58 @@
.. title:: clang-tidy - bugprone-suspicious-stringview-data-usage

bugprone-suspicious-stringview-data-usage
=========================================

Identifies suspicious usages of ``std::string_view::data()`` that could lead to
reading out-of-bounds data due to inadequate or incorrect string null
termination.

It warns when the result of ``data()`` is passed to a constructor or function
without also passing the corresponding result of ``size()`` or ``length()``
member function. Such usage can lead to unintended behavior, particularly when
assuming the data pointed to by ``data()`` is null-terminated.

The absence of a ``c_str()`` method in ``std::string_view`` often leads
developers to use ``data()`` as a substitute, especially when interfacing with
C APIs that expect null-terminated strings. However, since ``data()`` does not
guarantee null termination, this can result in unintended behavior if the API
relies on proper null termination for correct string interpretation.

In today's programming landscape, this scenario can occur when implicitly
converting an ``std::string_view`` to an ``std::string``. Since the constructor
in ``std::string`` designed for string-view-like objects is ``explicit``,
attempting to pass an ``std::string_view`` to a function expecting an
``std::string`` will result in a compilation error. As a workaround, developers
may be tempted to utilize the ``.data()`` method to achieve compilation,
introducing potential risks.

For instance:

.. code-block:: c++

void printString(const std::string& str) {
std::cout << "String: " << str << std::endl;
}

void something(std::string_view sv) {
printString(sv.data());
}

In this example, directly passing ``sv`` to the ``printString`` function would
lead to a compilation error due to the explicit nature of the ``std::string``
constructor. Consequently, developers might opt for ``sv.data()`` to resolve the
compilation error, albeit introducing potential hazards as discussed.

.. option:: StringViewTypes

Option allows users to specify custom string view-like types for analysis. It
accepts a semicolon-separated list of type names or regular expressions
matching these types. Default value is:
`::std::basic_string_view;::llvm::StringRef`.

.. option:: AllowedCallees

Specifies methods, functions, or classes where the result of ``.data()`` is
passed to. Allows to exclude such calls from the analysis. Accepts a
semicolon-separated list of names or regular expressions matching these
entities. Default value is: empty string.
1 change: 1 addition & 0 deletions clang-tools-extra/docs/clang-tidy/checks/list.rst
Expand Up @@ -139,6 +139,7 @@ Clang-Tidy Checks
:doc:`bugprone-suspicious-realloc-usage <bugprone/suspicious-realloc-usage>`,
:doc:`bugprone-suspicious-semicolon <bugprone/suspicious-semicolon>`, "Yes"
:doc:`bugprone-suspicious-string-compare <bugprone/suspicious-string-compare>`, "Yes"
:doc:`bugprone-suspicious-stringview-data-usage <bugprone/suspicious-stringview-data-usage>`,
:doc:`bugprone-swapped-arguments <bugprone/swapped-arguments>`, "Yes"
:doc:`bugprone-switch-missing-default-case <bugprone/switch-missing-default-case>`,
:doc:`bugprone-terminating-continue <bugprone/terminating-continue>`, "Yes"
Expand Down
Expand Up @@ -24,6 +24,7 @@ struct basic_string {
basic_string();
basic_string(const C *p, const A &a = A());
basic_string(const C *p, size_type count);
basic_string(const C *b, const C *e);

~basic_string();

Expand Down Expand Up @@ -85,6 +86,12 @@ struct basic_string_view {
const C *str;
constexpr basic_string_view(const C* s) : str(s) {}

const C *data() const;

bool empty() const;
size_type size() const;
size_type length() const;

size_type find(_Type v, size_type pos = 0) const;
size_type find(C ch, size_type pos = 0) const;
size_type find(const C* s, size_type pos, size_type count) const;
Expand Down
@@ -0,0 +1,56 @@
// RUN: %check_clang_tidy -std=c++17-or-later %s bugprone-suspicious-stringview-data-usage %t -- -- -isystem %clang_tidy_headers
#include <string>

struct View {
const char* str;
};

struct Pair {
const char* begin;
const char* end;
};

struct ViewWithSize {
const char* str;
std::string_view::size_type size;
};

void something(const char*);
void something(const char*, unsigned);
void something(const char*, unsigned, const char*);
void something_str(std::string, unsigned);

void invalid(std::string_view sv, std::string_view sv2) {
std::string s(sv.data());
// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
std::string si{sv.data()};
// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
std::string_view s2(sv.data());
// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
something(sv.data());
// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
something(sv.data(), sv.size(), sv2.data());
// CHECK-MESSAGES: :[[@LINE-1]]:39: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
something_str(sv.data(), sv.size());
// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
View view{sv.data()};
// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: result of a `data()` call may not be null terminated, provide size information to the callee to prevent potential issues
}

void valid(std::string_view sv) {
std::string s1(sv.data(), sv.data() + sv.size());
std::string s2(sv.data(), sv.data() + sv.length());
std::string s3(sv.data(), sv.size() + sv.data());
std::string s4(sv.data(), sv.length() + sv.data());
std::string s5(sv.data(), sv.size());
std::string s6(sv.data(), sv.length());
something(sv.data(), sv.size());
something(sv.data(), sv.length());
ViewWithSize view1{sv.data(), sv.size()};
ViewWithSize view2{sv.data(), sv.length()};
Pair view3{sv.data(), sv.data() + sv.size()};
Pair view4{sv.data(), sv.data() + sv.length()};
Pair view5{sv.data(), sv.size() + sv.data()};
Pair view6{sv.data(), sv.length() + sv.data()};
const char* str{sv.data()};
}

0 comments on commit 28c1279

Please sign in to comment.