Skip to content

Commit 1bd2d33

Browse files
committed
[analyzer][CStringChecker] Adjust the invalidation operation on the super region of the destination buffer during string copy
Fixing GitHub issue: #55019 Following the previous fix https://reviews.llvm.org/D12571 on issue #23328 The two issues report false memory leaks after calling string-copy APIs with a buffer field in an object as the destination. The buffer invalidation incorrectly drops the assignment to a heap memory block when no overflow problems happen. And the pointer of the dropped assignment is declared in the same object of the destination buffer. The previous fix only considers the `memcpy` functions whose copy length is available from arguments. In this issue, the copy length is inferable from the buffer declaration and string literals being copied. Therefore, I have adjusted the previous fix to reuse the copy length computed before. Besides, for APIs that never overflow (strsep) or we never know whether they can overflow (std::copy), new invalidation operations have been introduced to inform CStringChecker::InvalidateBuffer whether or not to invalidate the super region that encompasses the destination buffer. Reviewed By: steakhal Differential Revision: https://reviews.llvm.org/D152435
1 parent 280d163 commit 1bd2d33

File tree

4 files changed

+231
-75
lines changed

4 files changed

+231
-75
lines changed

clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp

Lines changed: 137 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -260,11 +260,34 @@ class CStringChecker : public Checker< eval::Call,
260260
const Expr *expr,
261261
SVal val) const;
262262

263-
static ProgramStateRef InvalidateBuffer(CheckerContext &C,
264-
ProgramStateRef state,
265-
const Expr *Ex, SVal V,
266-
bool IsSourceBuffer,
267-
const Expr *Size);
263+
/// Invalidate the destination buffer determined by characters copied.
264+
static ProgramStateRef
265+
invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
266+
const Expr *BufE, SVal BufV, SVal SizeV,
267+
QualType SizeTy);
268+
269+
/// Operation never overflows, do not invalidate the super region.
270+
static ProgramStateRef invalidateDestinationBufferNeverOverflows(
271+
CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
272+
273+
/// We do not know whether the operation can overflow (e.g. size is unknown),
274+
/// invalidate the super region and escape related pointers.
275+
static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
276+
CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
277+
278+
/// Invalidate the source buffer for escaping pointers.
279+
static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
280+
ProgramStateRef S,
281+
const Expr *BufE, SVal BufV);
282+
283+
/// @param InvalidationTraitOperations Determine how to invlidate the
284+
/// MemRegion by setting the invalidation traits. Return true to cause pointer
285+
/// escape, or false otherwise.
286+
static ProgramStateRef invalidateBufferAux(
287+
CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
288+
llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
289+
const MemRegion *)>
290+
InvalidationTraitOperations);
268291

269292
static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
270293
const MemRegion *MR);
@@ -310,10 +333,9 @@ class CStringChecker : public Checker< eval::Call,
310333
// Return true if the destination buffer of the copy function may be in bound.
311334
// Expects SVal of Size to be positive and unsigned.
312335
// Expects SVal of FirstBuf to be a FieldRegion.
313-
static bool IsFirstBufInBound(CheckerContext &C,
314-
ProgramStateRef state,
315-
const Expr *FirstBuf,
316-
const Expr *Size);
336+
static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
337+
SVal BufVal, QualType BufTy, SVal LengthVal,
338+
QualType LengthTy);
317339
};
318340

319341
} //end anonymous namespace
@@ -967,43 +989,40 @@ const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
967989
return strRegion->getStringLiteral();
968990
}
969991

970-
bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
971-
ProgramStateRef state,
972-
const Expr *FirstBuf,
973-
const Expr *Size) {
992+
bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
993+
SVal BufVal, QualType BufTy,
994+
SVal LengthVal, QualType LengthTy) {
974995
// If we do not know that the buffer is long enough we return 'true'.
975996
// Otherwise the parent region of this field region would also get
976997
// invalidated, which would lead to warnings based on an unknown state.
977998

999+
if (LengthVal.isUnknown())
1000+
return false;
1001+
9781002
// Originally copied from CheckBufferAccess and CheckLocation.
979-
SValBuilder &svalBuilder = C.getSValBuilder();
980-
ASTContext &Ctx = svalBuilder.getContext();
981-
const LocationContext *LCtx = C.getLocationContext();
1003+
SValBuilder &SB = C.getSValBuilder();
1004+
ASTContext &Ctx = C.getASTContext();
9821005

983-
QualType sizeTy = Size->getType();
9841006
QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
985-
SVal BufVal = state->getSVal(FirstBuf, LCtx);
9861007

987-
SVal LengthVal = state->getSVal(Size, LCtx);
9881008
std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
9891009
if (!Length)
9901010
return true; // cf top comment.
9911011

9921012
// Compute the offset of the last element to be accessed: size-1.
993-
NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
994-
SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy);
1013+
NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1014+
SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
9951015
if (Offset.isUnknown())
9961016
return true; // cf top comment
9971017
NonLoc LastOffset = Offset.castAs<NonLoc>();
9981018

9991019
// Check that the first buffer is sufficiently long.
1000-
SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
1020+
SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
10011021
std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
10021022
if (!BufLoc)
10031023
return true; // cf top comment.
10041024

1005-
SVal BufEnd =
1006-
svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
1025+
SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
10071026

10081027
// Check for out of bound array element access.
10091028
const MemRegion *R = BufEnd.getAsRegion();
@@ -1017,28 +1036,90 @@ bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
10171036
// FIXME: Does this crash when a non-standard definition
10181037
// of a library function is encountered?
10191038
assert(ER->getValueType() == C.getASTContext().CharTy &&
1020-
"IsFirstBufInBound should only be called with char* ElementRegions");
1039+
"isFirstBufInBound should only be called with char* ElementRegions");
10211040

10221041
// Get the size of the array.
10231042
const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1024-
DefinedOrUnknownSVal SizeDV = getDynamicExtent(state, superReg, svalBuilder);
1043+
DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
10251044

10261045
// Get the index of the accessed element.
10271046
DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
10281047

1029-
ProgramStateRef StInBound = state->assumeInBound(Idx, SizeDV, true);
1048+
ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
10301049

10311050
return static_cast<bool>(StInBound);
10321051
}
10331052

1034-
ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
1035-
ProgramStateRef state,
1036-
const Expr *E, SVal V,
1037-
bool IsSourceBuffer,
1038-
const Expr *Size) {
1053+
ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1054+
CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1055+
SVal SizeV, QualType SizeTy) {
1056+
auto InvalidationTraitOperations =
1057+
[&C, S, BufTy = BufE->getType(), BufV, SizeV,
1058+
SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1059+
// If destination buffer is a field region and access is in bound, do
1060+
// not invalidate its super region.
1061+
if (MemRegion::FieldRegionKind == R->getKind() &&
1062+
isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1063+
ITraits.setTrait(
1064+
R,
1065+
RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1066+
}
1067+
return false;
1068+
};
1069+
1070+
return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1071+
}
1072+
1073+
ProgramStateRef
1074+
CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1075+
CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1076+
auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1077+
const MemRegion *R) {
1078+
return isa<FieldRegion>(R);
1079+
};
1080+
1081+
return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1082+
}
1083+
1084+
ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1085+
CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1086+
auto InvalidationTraitOperations =
1087+
[](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1088+
if (MemRegion::FieldRegionKind == R->getKind())
1089+
ITraits.setTrait(
1090+
R,
1091+
RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1092+
return false;
1093+
};
1094+
1095+
return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1096+
}
1097+
1098+
ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1099+
ProgramStateRef S,
1100+
const Expr *BufE,
1101+
SVal BufV) {
1102+
auto InvalidationTraitOperations =
1103+
[](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1104+
ITraits.setTrait(
1105+
R->getBaseRegion(),
1106+
RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1107+
ITraits.setTrait(R,
1108+
RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1109+
return true;
1110+
};
1111+
1112+
return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1113+
}
1114+
1115+
ProgramStateRef CStringChecker::invalidateBufferAux(
1116+
CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1117+
llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1118+
const MemRegion *)>
1119+
InvalidationTraitOperations) {
10391120
std::optional<Loc> L = V.getAs<Loc>();
10401121
if (!L)
1041-
return state;
1122+
return State;
10421123

10431124
// FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
10441125
// some assumptions about the value that CFRefCount can't. Even so, it should
@@ -1055,37 +1136,18 @@ ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
10551136

10561137
// Invalidate this region.
10571138
const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1058-
1059-
bool CausesPointerEscape = false;
10601139
RegionAndSymbolInvalidationTraits ITraits;
1061-
// Invalidate and escape only indirect regions accessible through the source
1062-
// buffer.
1063-
if (IsSourceBuffer) {
1064-
ITraits.setTrait(R->getBaseRegion(),
1065-
RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1066-
ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1067-
CausesPointerEscape = true;
1068-
} else {
1069-
const MemRegion::Kind& K = R->getKind();
1070-
if (K == MemRegion::FieldRegionKind)
1071-
if (Size && IsFirstBufInBound(C, state, E, Size)) {
1072-
// If destination buffer is a field region and access is in bound,
1073-
// do not invalidate its super region.
1074-
ITraits.setTrait(
1075-
R,
1076-
RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1077-
}
1078-
}
1140+
bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
10791141

1080-
return state->invalidateRegions(R, E, C.blockCount(), LCtx,
1142+
return State->invalidateRegions(R, E, C.blockCount(), LCtx,
10811143
CausesPointerEscape, nullptr, nullptr,
10821144
&ITraits);
10831145
}
10841146

10851147
// If we have a non-region value by chance, just remove the binding.
10861148
// FIXME: is this necessary or correct? This handles the non-Region
10871149
// cases. Is it ever valid to store to these?
1088-
return state->killBinding(*L);
1150+
return State->killBinding(*L);
10891151
}
10901152

10911153
bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
@@ -1182,8 +1244,8 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
11821244
} else {
11831245
// If the destination buffer's extent is not equal to the value of
11841246
// third argument, just invalidate buffer.
1185-
State = InvalidateBuffer(C, State, DstBuffer, MemVal,
1186-
/*IsSourceBuffer*/ false, Size);
1247+
State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1248+
SizeVal, Size->getType());
11871249
}
11881250

11891251
if (StateNullChar && !StateNonNullChar) {
@@ -1208,8 +1270,8 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
12081270
} else {
12091271
// If the offset is not zero and char value is not concrete, we can do
12101272
// nothing but invalidate the buffer.
1211-
State = InvalidateBuffer(C, State, DstBuffer, MemVal,
1212-
/*IsSourceBuffer*/ false, Size);
1273+
State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1274+
SizeVal, Size->getType());
12131275
}
12141276
return true;
12151277
}
@@ -1305,15 +1367,14 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE,
13051367
// can use LazyCompoundVals to copy the source values into the destination.
13061368
// This would probably remove any existing bindings past the end of the
13071369
// copied region, but that's still an improvement over blank invalidation.
1308-
state =
1309-
InvalidateBuffer(C, state, Dest.Expression, C.getSVal(Dest.Expression),
1310-
/*IsSourceBuffer*/ false, Size.Expression);
1370+
state = invalidateDestinationBufferBySize(
1371+
C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
1372+
Size.Expression->getType());
13111373

13121374
// Invalidate the source (const-invalidation without const-pointer-escaping
13131375
// the address of the top-level region).
1314-
state = InvalidateBuffer(C, state, Source.Expression,
1315-
C.getSVal(Source.Expression),
1316-
/*IsSourceBuffer*/ true, nullptr);
1376+
state = invalidateSourceBuffer(C, state, Source.Expression,
1377+
C.getSVal(Source.Expression));
13171378

13181379
C.addTransition(state);
13191380
}
@@ -1985,13 +2046,13 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
19852046
// can use LazyCompoundVals to copy the source values into the destination.
19862047
// This would probably remove any existing bindings past the end of the
19872048
// string, but that's still an improvement over blank invalidation.
1988-
state = InvalidateBuffer(C, state, Dst.Expression, *dstRegVal,
1989-
/*IsSourceBuffer*/ false, nullptr);
2049+
state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
2050+
*dstRegVal, amountCopied,
2051+
C.getASTContext().getSizeType());
19902052

19912053
// Invalidate the source (const-invalidation without const-pointer-escaping
19922054
// the address of the top-level region).
1993-
state = InvalidateBuffer(C, state, srcExpr.Expression, srcVal,
1994-
/*IsSourceBuffer*/ true, nullptr);
2055+
state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
19952056

19962057
// Set the C string length of the destination, if we know it.
19972058
if (IsBounded && (appendK == ConcatFnKind::none)) {
@@ -2206,8 +2267,9 @@ void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
22062267

22072268
// Invalidate the search string, representing the change of one delimiter
22082269
// character to NUL.
2209-
State = InvalidateBuffer(C, State, SearchStrPtr.Expression, Result,
2210-
/*IsSourceBuffer*/ false, nullptr);
2270+
// As the replacement never overflows, do not invalidate its super region.
2271+
State = invalidateDestinationBufferNeverOverflows(
2272+
C, State, SearchStrPtr.Expression, Result);
22112273

22122274
// Overwrite the search string pointer. The new value is either an address
22132275
// further along in the same string, or NULL if there are no more tokens.
@@ -2256,8 +2318,10 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C,
22562318
// Invalidate the destination buffer
22572319
const Expr *Dst = CE->getArg(2);
22582320
SVal DstVal = State->getSVal(Dst, LCtx);
2259-
State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false,
2260-
/*Size=*/nullptr);
2321+
// FIXME: As we do not know how many items are copied, we also invalidate the
2322+
// super region containing the target location.
2323+
State =
2324+
invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
22612325

22622326
SValBuilder &SVB = C.getSValBuilder();
22632327

clang/test/Analysis/Inputs/system-header-simulator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ size_t strlen(const char *);
6363

6464
char *strcpy(char *restrict, const char *restrict);
6565
char *strncpy(char *dst, const char *src, size_t n);
66+
char *strsep(char **stringp, const char *delim);
6667
void *memcpy(void *dst, const void *src, size_t n);
68+
void *memset(void *s, int c, size_t n);
6769

6870
typedef unsigned long __darwin_pthread_key_t;
6971
typedef __darwin_pthread_key_t pthread_key_t;

0 commit comments

Comments
 (0)