-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[analyzer] Add alpha.cplusplus.BoundsInformation checker #112784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
//== BoundsInformationChecker.cpp - bounds information checker --*- C++ -*--==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This defines BoundsInformationChecker, a path-sensitive checker that | ||
// checks that the buffer and count arguments are within the bounds of | ||
// the source buffer. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" | ||
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" | ||
#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h" | ||
#include "clang/StaticAnalyzer/Core/Checker.h" | ||
#include "clang/StaticAnalyzer/Core/CheckerManager.h" | ||
#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" | ||
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" | ||
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" | ||
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" | ||
|
||
using namespace clang; | ||
using namespace ento; | ||
|
||
namespace { | ||
class BoundsInformationChecker : public Checker<check::PreCall> { | ||
const BugType BT_DifferentMemRegion{ | ||
this, "std::span constructor arguments from different sources", | ||
categories::SecurityError}; | ||
const BugType BT_NonConstantSizeArg{ | ||
this, | ||
"std::span constructor for std::array has non-constant size argument", | ||
categories::SecurityError}; | ||
const BugType BT_OutOfBounds{ | ||
this, | ||
"std::span constructor for std::array uses out-of-bounds size argument", | ||
categories::SecurityError}; | ||
void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C, | ||
const BugType &BT, StringRef Msg) const; | ||
|
||
public: | ||
void checkPreCall(const CallEvent &Call, CheckerContext &C) const; | ||
}; | ||
} // end anonymous namespace | ||
|
||
void BoundsInformationChecker::reportBug(ExplodedNode *N, const Expr *E, | ||
CheckerContext &C, const BugType &BT, | ||
StringRef Msg) const { | ||
// Generate a report for this bug. | ||
auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N); | ||
if (auto *CE = dyn_cast<CXXConstructExpr>(E)) { | ||
bugreporter::trackExpressionValue(N, CE->getArg(0), *R); | ||
bugreporter::trackExpressionValue(N, CE->getArg(1), *R); | ||
} | ||
C.emitReport(std::move(R)); | ||
} | ||
|
||
static const MemRegion *GetRegionOrigin(SVal SV) { | ||
const SymExpr *Sym = SV.getAsSymbol(/*IncludeBaseRegions =*/true); | ||
return Sym ? Sym->getOriginRegion() : nullptr; | ||
} | ||
|
||
static const ValueDecl *GetExpressionOrigin(const Stmt *STMT) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am wondering if there is a way to get around pattern matching. Is it possible, that the same information could be derived from the memory regions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes absolutely!! This code effectively handles But even then, if we simply had access to eg. the argument |
||
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(STMT)) { | ||
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) | ||
return VD; | ||
} else if (const CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(STMT)) { | ||
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>( | ||
MCE->getImplicitObjectArgument()->IgnoreParenCasts())) { | ||
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) | ||
return VD; | ||
} else if (const MemberExpr *ME = dyn_cast<MemberExpr>( | ||
MCE->getImplicitObjectArgument()->IgnoreParenCasts())) { | ||
if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) | ||
return FD; | ||
} | ||
} else if (const CXXOperatorCallExpr *OCE = | ||
dyn_cast<CXXOperatorCallExpr>(STMT)) { | ||
if (OCE->getNumArgs() >= 1) { | ||
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(OCE->getArg(0))) { | ||
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) | ||
return VD; | ||
} | ||
} | ||
} else if (const UnaryOperator *UnaryOp = dyn_cast<UnaryOperator>(STMT)) { | ||
if (const ArraySubscriptExpr *ASExpr = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this work properly for nesting? E.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main problem with nesting is that |
||
dyn_cast<ArraySubscriptExpr>(UnaryOp->getSubExpr())) { | ||
if (const DeclRefExpr *DRE = | ||
dyn_cast<DeclRefExpr>(ASExpr->getBase()->IgnoreParenCasts())) { | ||
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) | ||
return VD; | ||
} | ||
} | ||
} else if (const UnaryExprOrTypeTraitExpr *UTExpr = | ||
dyn_cast<UnaryExprOrTypeTraitExpr>(STMT)) { | ||
if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>( | ||
UTExpr->getArgumentExpr()->IgnoreParenCasts())) { | ||
if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) | ||
return VD; | ||
} | ||
} | ||
return nullptr; | ||
} | ||
|
||
static const ValueDecl *GetConjuredSymbolOrigin(SVal SV) { | ||
const SymExpr *Sym = SV.getAsSymbol(/*IncludeBaseRegions =*/true); | ||
if (const SymbolConjured *SCArg = dyn_cast_or_null<SymbolConjured>(Sym)) { | ||
if (const Stmt *STMTArg = SCArg->getStmt()) | ||
return GetExpressionOrigin(STMTArg); | ||
} | ||
return nullptr; | ||
} | ||
|
||
void BoundsInformationChecker::checkPreCall(const CallEvent &Call, | ||
CheckerContext &C) const { | ||
// Return early if not std::span<IT>(IT, size_t) constructor. | ||
// a. Check if this is a ctor for std::span. | ||
CallDescription CD({CDM::CXXMethod, {"std", "span", "span"}}); | ||
if (!CD.matches(Call)) | ||
return; | ||
// b. Check if std::span ctor has two arguments. | ||
if (Call.getNumArgs() != 2) | ||
return; | ||
// c. Check if second std::span ctor argument is of type size_t. | ||
if (Call.getArgExpr(1)->getType().getCanonicalType() != | ||
C.getASTContext().getSizeType()) | ||
return; | ||
|
||
SVal PointerArg = Call.getArgSVal(0); | ||
SVal SizeArg = Call.getArgSVal(1); | ||
|
||
// If buffer and length params are not from the same "source", then report a | ||
// bug. | ||
const MemRegion *MRArg0 = GetRegionOrigin(PointerArg); | ||
const MemRegion *MRArg1 = GetRegionOrigin(SizeArg); | ||
if (MRArg0 && MRArg1 && MRArg0->getBaseRegion() != MRArg1->getBaseRegion()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This heuristic will probably require the most tweaking. At a glance, the next step is probably to accept code such as void foo(int *ptr, size_t sz) {
std::span{ptr, sz};
} where |
||
// FIXME: Add more logic to filter out valid cases. | ||
if (ExplodedNode *N = C.generateNonFatalErrorNode(C.getState())) { | ||
reportBug( | ||
N, Call.getOriginExpr(), C, BT_DifferentMemRegion, | ||
"Constructor args for std::span are from different memory regions"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you have any "positive" tests for this warning yet. It's a good idea to have some! Wording: "memory regions" is our local terminology. Users won't necessarily understand what it means. I think we should say "from different sources" or something like that. Though, of course, this warning will probably change quite a bit as we figure out the specific cases in which we want to emit it. (It may be a good idea to have different warnings for different cases.) |
||
return; | ||
} | ||
} | ||
|
||
// Check if value comes from an unknown function call. | ||
const ValueDecl *VDArg0 = GetConjuredSymbolOrigin(PointerArg); | ||
const ValueDecl *VDArg1 = GetConjuredSymbolOrigin(SizeArg); | ||
|
||
if (VDArg0) { | ||
// If first argument is std::array. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this check might want to support multiple types ( |
||
// FIXME: Support C arrays. | ||
if (const auto *CRDecl0 = VDArg0->getType()->getAsCXXRecordDecl()) { | ||
if (CRDecl0->isInStdNamespace() && CRDecl0->getIdentifier() && | ||
CRDecl0->getName() == "array") { | ||
if (VDArg0 != VDArg1) { | ||
// Check second argument against known size of std::array. | ||
if (SizeArg.isConstant()) { | ||
if (const auto *CTSDecl = | ||
dyn_cast<ClassTemplateSpecializationDecl>(CRDecl0)) { | ||
const TemplateArgumentList &templateArgList = | ||
CTSDecl->getTemplateArgs(); | ||
if (templateArgList.size() == 2) { | ||
const TemplateArgument &templateArg1 = templateArgList[1]; | ||
if (templateArg1.getKind() == | ||
TemplateArgument::ArgKind::Integral && | ||
*SizeArg.getAsInteger() > templateArg1.getAsIntegral()) { | ||
if (ExplodedNode *N = | ||
C.generateNonFatalErrorNode(C.getState())) { | ||
reportBug(N, Call.getOriginExpr(), C, BT_OutOfBounds, | ||
"std::span constructed with overflow length"); | ||
return; | ||
} | ||
} | ||
} | ||
} | ||
} else if (ExplodedNode *N = | ||
C.generateNonFatalErrorNode(C.getState())) { | ||
reportBug(N, Call.getOriginExpr(), C, BT_NonConstantSizeArg, | ||
"std::span constructed from std::array with non-constant " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we sure that it's really a |
||
"length"); | ||
return; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
void ento::registerBoundsInformationChecker(CheckerManager &mgr) { | ||
mgr.registerChecker<BoundsInformationChecker>(); | ||
} | ||
|
||
bool ento::shouldRegisterBoundsInformationChecker(const CheckerManager &mgr) { | ||
return true; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// RUN: %clang %s -std=c++20 -Xclang -verify --analyze \ | ||
// RUN: -Xclang -analyzer-checker=core,alpha.cplusplus.BoundsInformation \ | ||
// RUN: -Xclang -analyzer-checker=debug.ExprInspection | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
#include "std-span-system-header.h" | ||
|
||
//----------------------------------------------------------------------------// | ||
// std::span - no issue | ||
//----------------------------------------------------------------------------// | ||
|
||
void stdSpanFromCArrayNoIssue() { | ||
char buffer[4] = { '3', '.', '1', '4' }; | ||
|
||
(void)std::span { buffer }; // no-warning | ||
(void)std::span<char> { buffer, sizeof(buffer) }; // no-warning | ||
(void)std::span<char> { &buffer[0], sizeof(buffer) }; // no-warning | ||
|
||
char *ptr = buffer; | ||
size_t size = sizeof(buffer); | ||
(void)std::span<char> { ptr, size }; // no-warning | ||
} | ||
|
||
void stdSpanFromStdArrayNoIssue() { | ||
std::array<char, 124> buffer { '3', '.', '1', '4' }; | ||
|
||
(void)std::span { buffer }.first(4); // no-warning | ||
(void)std::span<char> { buffer.data(), buffer.size() }.first(4); // no-warning | ||
(void)std::span<char> { &buffer[0], buffer.size() }.first(4); // no-warning | ||
|
||
char *ptr = buffer.data(); | ||
size_t size = buffer.size(); | ||
(void)std::span<char> { ptr, size }.first(4); // no-warning | ||
} | ||
|
||
void stdSpanFromStaticCArray() { | ||
static const uint8_t prefixDeltaFrame[6] = { 0x00, 0x00, 0x00, 0x01, 0x21, 0xe0 }; | ||
(void)std::span<const uint8_t> { prefixDeltaFrame, sizeof(prefixDeltaFrame) }; // no-warning | ||
(void)std::span<const uint8_t> { &prefixDeltaFrame[0], sizeof(prefixDeltaFrame) }; // no-warning | ||
} | ||
|
||
// No issue because arguments from the same memory region. | ||
// FIXME: This works locally, but not in actual WebKit code. | ||
class MappedFileData { | ||
public: | ||
unsigned size() const { return m_fileSize; } | ||
std::span<const uint8_t> span() const { return { static_cast<const uint8_t*>(m_fileData), size() }; } // no-warning | ||
std::span<uint8_t> mutableSpan() { return { static_cast<uint8_t*>(m_fileData), size() }; } // no-warning | ||
|
||
private: | ||
void* m_fileData { nullptr }; | ||
unsigned m_fileSize { 0 }; | ||
}; | ||
|
||
//----------------------------------------------------------------------------// | ||
// std::span - warnings | ||
//----------------------------------------------------------------------------// | ||
|
||
void stdSpanFromStdArrayWarnings() { | ||
std::array<char, 124> buffer { '3', '.', '1', '4' }; | ||
(void)std::span<char> { &buffer[0], 4 }; | ||
(void)std::span<char> { buffer.data(), 4 }; | ||
} | ||
|
||
void stdSpanFromStdArrayOutOfBounds() { | ||
std::array<char, 4> buffer { '3', '.', '1', '4' }; | ||
(void)std::span<char> { &buffer[0], 5 }; // expected-warning {{std::span constructed with overflow length}} | ||
(void)std::span<char> { buffer.data(), 5 }; // expected-warning {{std::span constructed with overflow length}} | ||
} | ||
|
||
struct HexNumberBuffer { | ||
std::array<char, 16> buffer; | ||
unsigned length; | ||
|
||
const char* characters() const { return &*(buffer.end() - length); } | ||
std::span<const char> span() const { return { characters(), length }; } // expected-warning {{std::span constructed from std::array with non-constant length}} | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#pragma clang system_header | ||
|
||
#include "Inputs/system-header-simulator-cxx.h" | ||
|
||
namespace std { | ||
|
||
template <class _Tp, size_t _Size> | ||
struct array { | ||
typedef array __self; | ||
typedef _Tp value_type; | ||
typedef value_type& reference; | ||
typedef const value_type& const_reference; | ||
typedef value_type* iterator; | ||
typedef const value_type* const_iterator; | ||
typedef value_type* pointer; | ||
typedef const value_type* const_pointer; | ||
typedef size_t size_type; | ||
|
||
_Tp __elems_[_Size]; | ||
|
||
reference operator[](size_type __n); | ||
const_reference operator[](size_type __n) const; | ||
|
||
value_type* data(); | ||
const value_type* data() const; | ||
|
||
size_type size() const; | ||
|
||
iterator begin(); | ||
const_iterator begin() const; | ||
iterator end(); | ||
const_iterator end() const; | ||
}; | ||
|
||
inline constexpr size_t dynamic_extent = (size_t)-1; | ||
|
||
template <typename _Tp, size_t _Extent = dynamic_extent> | ||
class span { | ||
public: | ||
using element_type = _Tp; | ||
using value_type = remove_cv_t<_Tp>; | ||
using size_type = size_t; | ||
using pointer = _Tp *; | ||
|
||
template <size_t _Sz = _Extent> requires(_Sz == 0) | ||
constexpr span(); | ||
|
||
constexpr span(const span&); | ||
|
||
template <typename _It> | ||
constexpr span(_It* __first, size_type __count); | ||
|
||
template <size_t _Sz> | ||
constexpr span(element_type (&__arr)[_Sz]); | ||
|
||
template <class _OtherElementType> | ||
constexpr span(array<_OtherElementType, _Extent>& __arr); | ||
|
||
template <class _OtherElementType> | ||
constexpr span(const array<_OtherElementType, _Extent>& __arr); | ||
|
||
constexpr pointer data() const; | ||
|
||
constexpr size_type size() const; | ||
constexpr size_type size_bytes() const; | ||
|
||
constexpr span<element_type, dynamic_extent> first(size_type __count) const; | ||
}; | ||
|
||
template<class _Tp, size_t _Sz> | ||
span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>; | ||
|
||
template<class _Tp, size_t _Sz> | ||
span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>; | ||
|
||
template<class _Tp, size_t _Sz> | ||
span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: we usually start function and method names with a lower-case letter.