diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index a42e3e69607771..a9ebe063c6c8bb 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2625,13 +2625,44 @@ alpha.unix.cstring.OutOfBounds (C) """""""""""""""""""""""""""""""""" Check for out-of-bounds access in string functions; applies to:`` strncopy, strncat``. - .. code-block:: c void test() { int y = strlen((char *)&test); // warn } +.. _alpha-unix-cstring-UninitializedRead: + +alpha.unix.cstring.UninitializedRead (C) +"""""""""""""""""""""""""""""""""""""""" +Check for uninitialized reads from common memory copy/manipulation functions such as: + ``memcpy, mempcpy, memmove, memcmp, strcmp, strncmp, strcpy, strlen, strsep`` and many more. + +.. code-block:: c + + void test() { + char src[10]; + char dst[5]; + memcpy(dst,src,sizeof(dst)); // warn: Bytes string function accesses uninitialized/garbage values + } + +Limitations: + + - Due to limitations of the memory modeling in the analyzer, one can likely + observe a lot of false-positive reports like this: + .. code-block:: c + + void false_positive() { + int src[] = {1, 2, 3, 4}; + int dst[5] = {0}; + memcpy(dst, src, 4 * sizeof(int)); // false-positive: + // The 'src' buffer was correctly initialized, yet we cannot conclude + // that since the analyzer could not see a direct initialization of the + // very last byte of the source buffer. + } + + More details at the corresponding `GitHub issue `_. + .. _alpha-nondeterminism-PointerIteration: alpha.nondeterminism.PointerIteration (C++) diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 6b72d64950106a..9340a114ac456a 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -475,7 +475,12 @@ def CStringNotNullTerm : Checker<"NotNullTerminated">, HelpText<"Check for arguments which are not null-terminating strings">, Dependencies<[CStringModeling]>, Documentation; - + +def CStringUninitializedRead : Checker<"UninitializedRead">, + HelpText<"Checks if the string manipulation function would read uninitialized bytes">, + Dependencies<[CStringModeling]>, + Documentation; + } // end "alpha.unix.cstring" let ParentPackage = Unix in { diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 8483a1a47cea18..54c9e887b8c8c0 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -80,7 +80,7 @@ class CStringChecker : public Checker< eval::Call, check::RegionChanges > { mutable std::unique_ptr BT_Null, BT_Bounds, BT_Overlap, - BT_NotCString, BT_AdditionOverflow; + BT_NotCString, BT_AdditionOverflow, BT_UninitRead; mutable const char *CurrentFunctionDescription; @@ -92,11 +92,13 @@ class CStringChecker : public Checker< eval::Call, DefaultBool CheckCStringOutOfBounds; DefaultBool CheckCStringBufferOverlap; DefaultBool CheckCStringNotNullTerm; + DefaultBool CheckCStringUninitializedRead; CheckerNameRef CheckNameCStringNullArg; CheckerNameRef CheckNameCStringOutOfBounds; CheckerNameRef CheckNameCStringBufferOverlap; CheckerNameRef CheckNameCStringNotNullTerm; + CheckerNameRef CheckNameCStringUninitializedRead; }; CStringChecksFilter Filter; @@ -367,6 +369,15 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, return nullptr; } + // Ensure that we wouldn't read uninitialized value. + if (Access == AccessKind::read) { + if (Filter.CheckCStringUninitializedRead && + StInBound->getSVal(ER).isUndef()) { + emitUninitializedReadBug(C, StInBound, Buffer.Expression); + return nullptr; + } + } + // Array bound check succeeded. From this point forward the array bound // should always succeed. return StInBound; @@ -578,6 +589,26 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, } } +void CStringChecker::emitUninitializedReadBug(CheckerContext &C, + ProgramStateRef State, + const Expr *E) const { + if (ExplodedNode *N = C.generateErrorNode(State)) { + const char *Msg = + "Bytes string function accesses uninitialized/garbage values"; + if (!BT_UninitRead) + BT_UninitRead.reset( + new BuiltinBug(Filter.CheckNameCStringUninitializedRead, + "Accessing unitialized/garbage values", Msg)); + + BuiltinBug *BT = static_cast(BT_UninitRead.get()); + + auto Report = std::make_unique(*BT, Msg, N); + Report->addRange(E->getSourceRange()); + bugreporter::trackExpressionValue(N, E, *Report); + C.emitReport(std::move(Report)); + } +} + void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, StringRef WarningMsg) const { @@ -2458,3 +2489,4 @@ REGISTER_CHECKER(CStringNullArg) REGISTER_CHECKER(CStringOutOfBounds) REGISTER_CHECKER(CStringBufferOverlap) REGISTER_CHECKER(CStringNotNullTerm) +REGISTER_CHECKER(CStringUninitializedRead) \ No newline at end of file diff --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c index c88452e49075d4..a7c7bdb23683e7 100644 --- a/clang/test/Analysis/bstring.c +++ b/clang/test/Analysis/bstring.c @@ -2,13 +2,15 @@ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-checker=alpha.unix.cstring \ +// RUN: -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \ // RUN: -analyzer-checker=debug.ExprInspection \ -// RUN: -analyzer-config eagerly-assume=false +// RUN: -analyzer-config eagerly-assume=false // // RUN: %clang_analyze_cc1 -verify %s -DUSE_BUILTINS \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-checker=alpha.unix.cstring \ +// RUN: -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false // @@ -16,6 +18,7 @@ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-checker=alpha.unix.cstring \ +// RUN: -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false // @@ -23,6 +26,7 @@ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-checker=alpha.unix.cstring \ +// RUN: -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false @@ -315,7 +319,7 @@ void mempcpy15(void) { p1 = (&s2) + 1; p2 = mempcpy(&s2, &s1, sizeof(struct st)); - + clang_analyzer_eval(p1 == p2); // expected-warning{{TRUE}} } diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c new file mode 100644 index 00000000000000..c535e018e62c27 --- /dev/null +++ b/clang/test/Analysis/bstring_UninitRead.c @@ -0,0 +1,59 @@ +// RUN: %clang_analyze_cc1 -verify %s \ +// RUN: -analyzer-checker=core,alpha.unix.cstring + + +// This file is generally for the alpha.unix.cstring.UninitializedRead Checker, the reason for putting it into +// the separate file because the checker is break the some existing test cases in bstring.c file , so we don't +// wanna mess up with some existing test case so it's better to create separate file for it, this file also include +// the broken test for the reference in future about the broken tests. + + +typedef typeof(sizeof(int)) size_t; + +void clang_analyzer_eval(int); + +void *memcpy(void *restrict s1, const void *restrict s2, size_t n); + +void top(char *dst) { + char buf[10]; + memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} + (void)buf; +} + +//===----------------------------------------------------------------------=== +// mempcpy() +//===----------------------------------------------------------------------=== + +void *mempcpy(void *restrict s1, const void *restrict s2, size_t n); + +void mempcpy14() { + int src[] = {1, 2, 3, 4}; + int dst[5] = {0}; + int *p; + + p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} + // FIXME: This behaviour is actually surprising and needs to be fixed, + // mempcpy seems to consider the very last byte of the src buffer uninitialized + // and returning undef unfortunately. It should have returned unknown or a conjured value instead. + + clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal) +} + +struct st { + int i; + int j; +}; + + +void mempcpy15() { + struct st s1 = {0}; + struct st s2; + struct st *p1; + struct st *p2; + + p1 = (&s2) + 1; + p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}} + // FIXME: It seems same as mempcpy14() case. + + clang_analyzer_eval(p1 == p2); // no-warning (above is fatal) +}