Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,11 @@ def VirtualCallChecker : Checker<"VirtualCall">,

let ParentPackage = CplusplusAlpha in {

def BoundsInformationChecker : Checker<"BoundsInformation">,
HelpText<"Confirms that view objects such as std::span are constructed "
"within the bounds of the source buffer">,
Documentation<NotDocumented>;

def ContainerModeling : Checker<"ContainerModeling">,
HelpText<"Models C++ containers">,
Documentation<NotDocumented>,
Expand Down
199 changes: 199 additions & 0 deletions clang/lib/StaticAnalyzer/Checkers/BoundsInformationChecker.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
//== BoundsInformationChecker.cpp - bounds information checker --*- C++ -*--==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This defines BoundsInformationChecker, a path-sensitive checker that
// checks that the buffer and count arguments are within the bounds of
// the source buffer.
//
//===----------------------------------------------------------------------===//

#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"

using namespace clang;
using namespace ento;

namespace {
class BoundsInformationChecker : public Checker<check::PreCall> {
const BugType BT_DifferentMemRegion{
this, "std::span constructor arguments from different sources",
categories::SecurityError};
const BugType BT_NonConstantSizeArg{
this,
"std::span constructor for std::array has non-constant size argument",
categories::SecurityError};
const BugType BT_OutOfBounds{
this,
"std::span constructor for std::array uses out-of-bounds size argument",
categories::SecurityError};
void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C,
const BugType &BT, StringRef Msg) const;

public:
void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
};
} // end anonymous namespace

void BoundsInformationChecker::reportBug(ExplodedNode *N, const Expr *E,
CheckerContext &C, const BugType &BT,
StringRef Msg) const {
// Generate a report for this bug.
auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
if (auto *CE = dyn_cast<CXXConstructExpr>(E)) {
bugreporter::trackExpressionValue(N, CE->getArg(0), *R);
bugreporter::trackExpressionValue(N, CE->getArg(1), *R);
}
C.emitReport(std::move(R));
}

static const MemRegion *GetRegionOrigin(SVal SV) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we usually start function and method names with a lower-case letter.

const SymExpr *Sym = SV.getAsSymbol(/*IncludeBaseRegions =*/true);
return Sym ? Sym->getOriginRegion() : nullptr;
}

static const ValueDecl *GetExpressionOrigin(const Stmt *STMT) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering if there is a way to get around pattern matching. Is it possible, that the same information could be derived from the memory regions?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes absolutely!!

This code effectively handles SymbolConjured, i.e. the one place where we don't have such information.

But even then, if we simply had access to eg. the argument SVals, the 'this' SVal of the call that produced the SymbolConjured, we'd be in a much better shape.

if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(STMT)) {
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
return VD;
} else if (const CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(STMT)) {
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(
MCE->getImplicitObjectArgument()->IgnoreParenCasts())) {
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
return VD;
} else if (const MemberExpr *ME = dyn_cast<MemberExpr>(
MCE->getImplicitObjectArgument()->IgnoreParenCasts())) {
if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
return FD;
}
} else if (const CXXOperatorCallExpr *OCE =
dyn_cast<CXXOperatorCallExpr>(STMT)) {
if (OCE->getNumArgs() >= 1) {
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(OCE->getArg(0))) {
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
return VD;
}
}
} else if (const UnaryOperator *UnaryOp = dyn_cast<UnaryOperator>(STMT)) {
if (const ArraySubscriptExpr *ASExpr =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this work properly for nesting? E.g., foo[i][j]. I wonder if this needs to be recursive. Or is this a scenario that we explicitly don't want to support yet?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main problem with nesting is that std::span itself doesn't work properly with nesting. A std::span<std::span<T>> doesn't really represent a view into std::array<std::array<T>>. This is why std::mdspan is a thing, but it's an entire completely different thing.

dyn_cast<ArraySubscriptExpr>(UnaryOp->getSubExpr())) {
if (const DeclRefExpr *DRE =
dyn_cast<DeclRefExpr>(ASExpr->getBase()->IgnoreParenCasts())) {
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
return VD;
}
}
} else if (const UnaryExprOrTypeTraitExpr *UTExpr =
dyn_cast<UnaryExprOrTypeTraitExpr>(STMT)) {
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(
UTExpr->getArgumentExpr()->IgnoreParenCasts())) {
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
return VD;
}
}
return nullptr;
}

static const ValueDecl *GetConjuredSymbolOrigin(SVal SV) {
const SymExpr *Sym = SV.getAsSymbol(/*IncludeBaseRegions =*/true);
if (const SymbolConjured *SCArg = dyn_cast_or_null<SymbolConjured>(Sym)) {
if (const Stmt *STMTArg = SCArg->getStmt())
return GetExpressionOrigin(STMTArg);
}
return nullptr;
}

void BoundsInformationChecker::checkPreCall(const CallEvent &Call,
CheckerContext &C) const {
// Return early if not std::span<IT>(IT, size_t) constructor.
// a. Check if this is a ctor for std::span.
CallDescription CD({CDM::CXXMethod, {"std", "span", "span"}});
if (!CD.matches(Call))
return;
// b. Check if std::span ctor has two arguments.
if (Call.getNumArgs() != 2)
return;
// c. Check if second std::span ctor argument is of type size_t.
if (Call.getArgExpr(1)->getType().getCanonicalType() !=
C.getASTContext().getSizeType())
return;

SVal PointerArg = Call.getArgSVal(0);
SVal SizeArg = Call.getArgSVal(1);

// If buffer and length params are not from the same "source", then report a
// bug.
const MemRegion *MRArg0 = GetRegionOrigin(PointerArg);
const MemRegion *MRArg1 = GetRegionOrigin(SizeArg);
if (MRArg0 && MRArg1 && MRArg0->getBaseRegion() != MRArg1->getBaseRegion()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This heuristic will probably require the most tweaking. At a glance, the next step is probably to accept code such as

void foo(int *ptr, size_t sz) {
    std::span{ptr, sz};
}

where ptr and sz have different base regions (they're two separate parameters) but it's easy to see that they both belong to the same StackArgumentsSpaceRegion (which confirms not only that they're parameters of the same function, but also that they're arguments of the same call).

// FIXME: Add more logic to filter out valid cases.
if (ExplodedNode *N = C.generateNonFatalErrorNode(C.getState())) {
reportBug(
N, Call.getOriginExpr(), C, BT_DifferentMemRegion,
"Constructor args for std::span are from different memory regions");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you have any "positive" tests for this warning yet. It's a good idea to have some!

Wording: "memory regions" is our local terminology. Users won't necessarily understand what it means. I think we should say "from different sources" or something like that. Though, of course, this warning will probably change quite a bit as we figure out the specific cases in which we want to emit it. (It may be a good idea to have different warnings for different cases.)

return;
}
}

// Check if value comes from an unknown function call.
const ValueDecl *VDArg0 = GetConjuredSymbolOrigin(PointerArg);
const ValueDecl *VDArg1 = GetConjuredSymbolOrigin(SizeArg);

if (VDArg0) {
// If first argument is std::array.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this check might want to support multiple types (std::array, C array, and maybe even more in the future, like we could have std::vectors with known sizes (if the std::vector is const and we know the size from the ctor), we might want to move this logic of deriving the size to a separate function.

// FIXME: Support C arrays.
if (const auto *CRDecl0 = VDArg0->getType()->getAsCXXRecordDecl()) {
if (CRDecl0->isInStdNamespace() && CRDecl0->getIdentifier() &&
CRDecl0->getName() == "array") {
if (VDArg0 != VDArg1) {
// Check second argument against known size of std::array.
if (SizeArg.isConstant()) {
if (const auto *CTSDecl =
dyn_cast<ClassTemplateSpecializationDecl>(CRDecl0)) {
const TemplateArgumentList &templateArgList =
CTSDecl->getTemplateArgs();
if (templateArgList.size() == 2) {
const TemplateArgument &templateArg1 = templateArgList[1];
if (templateArg1.getKind() ==
TemplateArgument::ArgKind::Integral &&
*SizeArg.getAsInteger() > templateArg1.getAsIntegral()) {
if (ExplodedNode *N =
C.generateNonFatalErrorNode(C.getState())) {
reportBug(N, Call.getOriginExpr(), C, BT_OutOfBounds,
"std::span constructed with overflow length");
return;
}
}
}
}
} else if (ExplodedNode *N =
C.generateNonFatalErrorNode(C.getState())) {
reportBug(N, Call.getOriginExpr(), C, BT_NonConstantSizeArg,
"std::span constructed from std::array with non-constant "
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure that it's really a std::array on this branch? I think GetExpressionOrigin() works with raw arrays too.

"length");
return;
}
}
}
}
}
}

void ento::registerBoundsInformationChecker(CheckerManager &mgr) {
mgr.registerChecker<BoundsInformationChecker>();
}

bool ento::shouldRegisterBoundsInformationChecker(const CheckerManager &mgr) {
return true;
}
1 change: 1 addition & 0 deletions clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_clang_library(clangStaticAnalyzerCheckers
BitwiseShiftChecker.cpp
BlockInCriticalSectionChecker.cpp
BoolAssignmentChecker.cpp
BoundsInformationChecker.cpp
BuiltinFunctionChecker.cpp
CStringChecker.cpp
CStringSyntaxChecker.cpp
Expand Down
76 changes: 76 additions & 0 deletions clang/test/Analysis/bounds-information-checker.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// RUN: %clang %s -std=c++20 -Xclang -verify --analyze \
// RUN: -Xclang -analyzer-checker=core,alpha.cplusplus.BoundsInformation \
// RUN: -Xclang -analyzer-checker=debug.ExprInspection
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ExprInspection is probably unnecessary. (It's a checker built for the purposes of debugging/testing the engine, mostly. It reacts on magic function calls such as clang_analyzer_eval(). You don't have any of those magic calls in the test. See also https://clang.llvm.org/docs/analyzer/developer-docs/DebugChecks.html)


#include "std-span-system-header.h"

//----------------------------------------------------------------------------//
// std::span - no issue
//----------------------------------------------------------------------------//

void stdSpanFromCArrayNoIssue() {
char buffer[4] = { '3', '.', '1', '4' };

(void)std::span { buffer }; // no-warning
(void)std::span<char> { buffer, sizeof(buffer) }; // no-warning
(void)std::span<char> { &buffer[0], sizeof(buffer) }; // no-warning

char *ptr = buffer;
size_t size = sizeof(buffer);
(void)std::span<char> { ptr, size }; // no-warning
}

void stdSpanFromStdArrayNoIssue() {
std::array<char, 124> buffer { '3', '.', '1', '4' };

(void)std::span { buffer }.first(4); // no-warning
(void)std::span<char> { buffer.data(), buffer.size() }.first(4); // no-warning
(void)std::span<char> { &buffer[0], buffer.size() }.first(4); // no-warning

char *ptr = buffer.data();
size_t size = buffer.size();
(void)std::span<char> { ptr, size }.first(4); // no-warning
}

void stdSpanFromStaticCArray() {
static const uint8_t prefixDeltaFrame[6] = { 0x00, 0x00, 0x00, 0x01, 0x21, 0xe0 };
(void)std::span<const uint8_t> { prefixDeltaFrame, sizeof(prefixDeltaFrame) }; // no-warning
(void)std::span<const uint8_t> { &prefixDeltaFrame[0], sizeof(prefixDeltaFrame) }; // no-warning
}

// No issue because arguments from the same memory region.
// FIXME: This works locally, but not in actual WebKit code.
class MappedFileData {
public:
unsigned size() const { return m_fileSize; }
std::span<const uint8_t> span() const { return { static_cast<const uint8_t*>(m_fileData), size() }; } // no-warning
std::span<uint8_t> mutableSpan() { return { static_cast<uint8_t*>(m_fileData), size() }; } // no-warning

private:
void* m_fileData { nullptr };
unsigned m_fileSize { 0 };
};

//----------------------------------------------------------------------------//
// std::span - warnings
//----------------------------------------------------------------------------//

void stdSpanFromStdArrayWarnings() {
std::array<char, 124> buffer { '3', '.', '1', '4' };
(void)std::span<char> { &buffer[0], 4 };
(void)std::span<char> { buffer.data(), 4 };
}

void stdSpanFromStdArrayOutOfBounds() {
std::array<char, 4> buffer { '3', '.', '1', '4' };
(void)std::span<char> { &buffer[0], 5 }; // expected-warning {{std::span constructed with overflow length}}
(void)std::span<char> { buffer.data(), 5 }; // expected-warning {{std::span constructed with overflow length}}
}

struct HexNumberBuffer {
std::array<char, 16> buffer;
unsigned length;

const char* characters() const { return &*(buffer.end() - length); }
std::span<const char> span() const { return { characters(), length }; } // expected-warning {{std::span constructed from std::array with non-constant length}}
};
78 changes: 78 additions & 0 deletions clang/test/Analysis/std-span-system-header.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#pragma clang system_header

#include "Inputs/system-header-simulator-cxx.h"

namespace std {

template <class _Tp, size_t _Size>
struct array {
typedef array __self;
typedef _Tp value_type;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef value_type* iterator;
typedef const value_type* const_iterator;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef size_t size_type;

_Tp __elems_[_Size];

reference operator[](size_type __n);
const_reference operator[](size_type __n) const;

value_type* data();
const value_type* data() const;

size_type size() const;

iterator begin();
const_iterator begin() const;
iterator end();
const_iterator end() const;
};

inline constexpr size_t dynamic_extent = (size_t)-1;

template <typename _Tp, size_t _Extent = dynamic_extent>
class span {
public:
using element_type = _Tp;
using value_type = remove_cv_t<_Tp>;
using size_type = size_t;
using pointer = _Tp *;

template <size_t _Sz = _Extent> requires(_Sz == 0)
constexpr span();

constexpr span(const span&);

template <typename _It>
constexpr span(_It* __first, size_type __count);

template <size_t _Sz>
constexpr span(element_type (&__arr)[_Sz]);

template <class _OtherElementType>
constexpr span(array<_OtherElementType, _Extent>& __arr);

template <class _OtherElementType>
constexpr span(const array<_OtherElementType, _Extent>& __arr);

constexpr pointer data() const;

constexpr size_type size() const;
constexpr size_type size_bytes() const;

constexpr span<element_type, dynamic_extent> first(size_type __count) const;
};

template<class _Tp, size_t _Sz>
span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>;

template<class _Tp, size_t _Sz>
span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>;

template<class _Tp, size_t _Sz>
span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>;
}
Loading