diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 144411495f5a1..3684bccaf2d9a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -475,7 +475,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C, return nullptr; } - // We won't check whether the entire region is fully initialized -- lets just + // We won't check whether the entire region is fully initialized -- let's just // check that the first and the last element is. So, onto checking the last // element: const QualType IdxTy = SVB.getArrayIndexType(); @@ -576,8 +576,20 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size); if (StOutBound && !StInBound) { + // The analyzer determined that the access is out-of-bounds, which is + // a fatal error: ideally we'd return nullptr to terminate this path + // regardless of whether the OutOfBounds checker frontend is enabled. + // However, the current out-of-bounds modeling produces too many false + // positives, so when the frontend is disabled we return the original + // (unconstrained) state and let the analysis continue. This is + // inconsistent: returning `state` instead of `StOutBound` discards the + // constraint that the index is out-of-bounds, and callers cannot + // distinguish "we proved an error" from "we couldn't determine anything" + // since both return the original state. + // TODO: Once the OutOfBounds frontend is stable, return nullptr here + // unconditionally to stop the analysis on this path. if (!OutOfBounds.isEnabled()) - return nullptr; + return state; ErrorMessage Message = createOutOfBoundErrorMsg(CurrentFunctionDescription, Access); @@ -610,10 +622,6 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State, if (!State) return nullptr; - // If out-of-bounds checking is turned off, skip the rest. - if (!OutOfBounds.isEnabled()) - return State; - SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType()); @@ -661,9 +669,6 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, SizeArgExpr Size, AnyArgExpr First, AnyArgExpr Second, CharKind CK) const { - if (!BufferOverlap.isEnabled()) - return state; - // Do a simple check for overlap: if the two arguments are from the same // buffer, see if the end of the first is greater than the start of the second // or vice versa. @@ -702,9 +707,24 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); if (stateTrue && !stateFalse) { - // If the values are known to be equal, that's automatically an overlap. - emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); - return nullptr; + if (BufferOverlap.isEnabled()) { + // If the values are known to be equal, that's automatically an overlap. + emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); + return nullptr; + } + // The analyzer proved that the two pointers are equal, which guarantees + // overlap. When BufferOverlap is disabled, we return the original state + // instead of nullptr (to avoid stopping the path) or stateTrue (which + // would encode the equality constraint). This creates an inconsistency: + // callers treat any non-null return as "no overlap found" and proceed + // with subsequent modeling (e.g. memcpy side effects), even though the + // operation has undefined behavior. Additionally, returning `state` instead + // of `stateTrue` discards the pointer-equality constraint, making the + // analysis less precise. + // FIXME: At minimum, return stateTrue to preserve the equality + // constraint. Ideally, return nullptr to stop the path unconditionally, + // since overlap is proven regardless of whether we report it. + return state; } // assume the two expressions are not equal. @@ -768,9 +788,20 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); if (stateTrue && !stateFalse) { - // Overlap! - emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); - return nullptr; + if (BufferOverlap.isEnabled()) { + emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); + return nullptr; + } + // The analyzer proved that the end of the first buffer is past the start + // of the second, which means the buffers overlap. This is the same + // inconsistency as the equal-pointers case above: when BufferOverlap is + // disabled, we return the original state, so callers cannot distinguish + // "proven overlap" from "couldn't determine anything" and will proceed + // to model side effects (e.g. memcpy) on a path with proven UB. + // Returning `stateTrue` would at least preserve the overlap constraint; + // returning nullptr would correctly terminate the path. + // FIXME: Return nullptr unconditionally once BufferOverlap is stable. + return state; } // assume the two expressions don't overlap. @@ -779,7 +810,10 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, } void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, - const Stmt *First, const Stmt *Second) const { + const Stmt *First, + const Stmt *Second) const { + assert(BufferOverlap.isEnabled() && + "Can't emit from a checker that is not enabled!"); ExplodedNode *N = C.generateErrorNode(state); if (!N) return; @@ -795,6 +829,8 @@ void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const { + assert(NullArg.isEnabled() && + "Can't emit from a checker that is not enabled!"); if (ExplodedNode *N = C.generateErrorNode(State)) { auto Report = std::make_unique(NullArg, WarningMsg, N); @@ -809,6 +845,8 @@ void CStringChecker::emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, const Expr *E, const MemRegion *R, StringRef Msg) const { + assert(UninitializedRead.isEnabled() && + "Can't emit from a checker that is not enabled!"); if (ExplodedNode *N = C.generateErrorNode(State)) { auto Report = std::make_unique(UninitializedRead, Msg, N); @@ -824,6 +862,8 @@ void CStringChecker::emitUninitializedReadBug(CheckerContext &C, void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const { + assert(OutOfBounds.isEnabled() && + "Can't emit from a checker that is not enabled!"); if (ExplodedNode *N = C.generateErrorNode(State)) { // FIXME: It would be nice to eventually make this diagnostic more clear, // e.g., by referencing the original declaration or by saying *why* this @@ -838,6 +878,8 @@ void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const { + assert(NotNullTerm.isEnabled() && + "Can't emit from a checker that is not enabled!"); if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { auto Report = std::make_unique(NotNullTerm, WarningMsg, N); @@ -848,13 +890,9 @@ void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, } ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, - ProgramStateRef state, - NonLoc left, - NonLoc right) const { - // If out-of-bounds checking is turned off, skip the rest. - if (!OutOfBounds.isEnabled()) - return state; - + ProgramStateRef state, + NonLoc left, + NonLoc right) const { // If a previous check has failed, propagate the failure. if (!state) return nullptr; diff --git a/clang/test/Analysis/bstring.cpp b/clang/test/Analysis/bstring.cpp index 9c30bef15d407..93cfc871296e0 100644 --- a/clang/test/Analysis/bstring.cpp +++ b/clang/test/Analysis/bstring.cpp @@ -1,8 +1,30 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring,unix.Malloc,alpha.unix.cstring,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -analyzer-checker=core,unix.cstring,unix.Malloc,alpha.unix.cstring,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -DVARIANT -analyzer-checker=core,unix.cstring,alpha.unix.cstring,unix.Malloc,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -DVARIANT -analyzer-checker=core,unix.cstring,alpha.unix.cstring,unix.Malloc,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s -// RUN: %clang_analyze_cc1 -DSUPPRESS_OUT_OF_BOUND -analyzer-checker=core,unix.cstring,unix.Malloc,alpha.unix.cstring.BufferOverlap,unix.cstring.NotNullTerminated,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s +// DEFINE: %{analyzer} = %clang_analyze_cc1 \ +// DEFINE: -analyzer-checker=core \ +// DEFINE: -analyzer-checker=unix.cstring \ +// DEFINE: -analyzer-checker=unix.Malloc \ +// DEFINE: -analyzer-checker=debug.ExprInspection \ +// DEFINE: -analyzer-config eagerly-assume=false \ +// DEFINE: -verify %s + +// RUN: %{analyzer} \ +// RUN: -analyzer-checker=alpha.unix.cstring + +// RUN: %{analyzer} -DUSE_BUILTINS \ +// RUN: -analyzer-checker=alpha.unix.cstring + +// RUN: %{analyzer} -DVARIANT \ +// RUN: -analyzer-checker=alpha.unix.cstring + +// RUN: %{analyzer} -DUSE_BUILTINS -DVARIANT \ +// RUN: -analyzer-checker=alpha.unix.cstring + +// RUN: %{analyzer} -DSUPPRESS_OUT_OF_BOUND \ +// RUN: -analyzer-checker=alpha.unix.cstring.BufferOverlap \ +// RUN: -analyzer-checker=unix.cstring.NotNullTerminated + +// RUN: %{analyzer} \ +// RUN: -DUNINIT_WITHOUT_OUTOFBOUND \ +// RUN: -analyzer-checker=alpha.unix.cstring.UninitializedRead #include "Inputs/system-header-simulator-cxx.h" #include "Inputs/system-header-simulator-for-malloc.h" @@ -103,6 +125,11 @@ void memset1_inheritance() { #ifdef SUPPRESS_OUT_OF_BOUND void memset2_inheritance_field() { Derived d; + // FIXME: This example wrongly calls `memset` on the derived field, with the + // size parameter that has the size of the whole derived class. The analysis + // should stop at that point as this is UB. + // This test asserts the current behavior of treating the not set part as + // UNKNOWN. memset(&d.d_mem, 0, sizeof(Derived)); clang_analyzer_eval(d.b_mem == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(d.d_mem == 0); // expected-warning{{UNKNOWN}} @@ -110,6 +137,12 @@ void memset2_inheritance_field() { void memset3_inheritance_field() { Derived d; + // FIXME: Here we are setting the field of the base with the size of the + // Derived class. By the letter of the standard this is UB, but practically + // this only touches memory it is supposed to with the above class + // definitions. If we were to be strict the analysis should stop here. + // This test asserts the current behavior of nevertheless treating the + // wrongly set field as correctly set to 0. memset(&d.b_mem, 0, sizeof(Derived)); clang_analyzer_eval(d.b_mem == 0); // expected-warning{{TRUE}} clang_analyzer_eval(d.d_mem == 0); // expected-warning{{TRUE}} @@ -176,6 +209,12 @@ class DerivedVirtual : public BaseVirtual { #ifdef SUPPRESS_OUT_OF_BOUND void memset8_virtual_inheritance_field() { DerivedVirtual d; + // FIXME: This example wrongly calls `memset` on the derived field, with the + // size parameter that has the size of the whole derived class. The analysis + // should stop at that point as this is UB. The situation is further + // complicated by the fact the base base a virtual function. + // This test asserts the current behavior of treating the not set part as + // UNKNOWN. memset(&d.b_mem, 0, sizeof(Derived)); clang_analyzer_eval(d.b_mem == 0); // expected-warning{{UNKNOWN}} clang_analyzer_eval(d.d_mem == 0); // expected-warning{{UNKNOWN}} @@ -188,8 +227,21 @@ void memset1_new_array() { int *array = new int[10]; memset(array, 0, 10 * sizeof(int)); clang_analyzer_eval(array[2] == 0); // expected-warning{{TRUE}} + // FIXME: The analyzer should stop analysis after memset. Maybe the intent of + // this test was to test for this as a desired behaviour, but it shouldn't be, + // going out-of-bounds with memset is a fatal error, even if we decide not to + // report it. memset(array + 1, 'a', 10 * sizeof(9)); clang_analyzer_eval(array[2] == 0); // expected-warning{{UNKNOWN}} delete[] array; } #endif + +#ifdef UNINIT_WITHOUT_OUTOFBOUND +void memmove_uninit_without_outofbound() { + int src[4]; + int dst[4]; + memmove(dst, src, sizeof(src)); // expected-warning{{The first element of the 2nd argument is undefined}} + // expected-note@-1{{Other elements might also be undefined}} +} +#endif diff --git a/clang/test/Analysis/malloc.c b/clang/test/Analysis/malloc.c index 92b47bc3b5e9a..849ab3a3a0f37 100644 --- a/clang/test/Analysis/malloc.c +++ b/clang/test/Analysis/malloc.c @@ -870,12 +870,23 @@ void doNotInvalidateWhenPassedToSystemCalls(char *s) { strlen(p); strcpy(p, s); strcpy(s, p); + // FIXME: We should stop analysis here, even if we emit no warnings, since + // overlapping buffers for strycpy is a fatal error. strcpy(p, p); memcpy(p, s, 1); memcpy(s, p, 1); memcpy(p, p, 1); } // expected-warning {{leak}} +void overlappingMemcpyDoesNotSinkPath(char *s) { + char *p = malloc(12); + // FIXME: We should stop analysis here, even if we emit no warnings, since + // overlapping buffers for strycpy is a fatal error. + int a[4] = {0}; + memcpy(a+2, a+1, 8); + (void)p; +} // expected-warning {{leak}} + // Treat source buffer contents as escaped. void escapeSourceContents(char *s) { char *p = malloc(12);