diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h index 153b8a732166fb..a80484610131b2 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h @@ -287,6 +287,20 @@ class RangeSet { return contains(T.getZeroValue()); } + /// Test if the range is the [0,0] range. + /// + /// Complexity: O(1) + bool encodesFalseRange() const { + const llvm::APSInt *Constant = getConcreteValue(); + return Constant && Constant->isZero(); + } + + /// Test if the range doesn't contain zero. + /// + /// Complexity: O(logN) + /// where N = size(this) + bool encodesTrueRange() const { return !containsZero(); } + void dump(raw_ostream &OS) const; void dump() const; diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 6e4ee6e337ce54..ab7d2f1ba81a25 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -601,6 +601,10 @@ class EquivalenceClass : public llvm::FoldingSetNode { LLVM_NODISCARD static inline Optional areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second); + /// Remove one member from the class. + LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State, + const SymbolRef Old); + /// Iterate over all symbols and try to simplify them. LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB, RangeSet::Factory &F, RangedConstraintManager &RCM, @@ -656,6 +660,7 @@ class EquivalenceClass : public llvm::FoldingSetNode { inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State, SymbolSet Members, EquivalenceClass Other, SymbolSet OtherMembers); + static inline bool addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints, RangeSet::Factory &F, ProgramStateRef State, @@ -1749,6 +1754,19 @@ bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym, return false; } + // We may have trivial equivalence classes in the disequality info as + // well, and we need to simplify them. + DisequalityMapTy DisequalityInfo = State->get(); + for (std::pair DisequalityEntry : + DisequalityInfo) { + EquivalenceClass Class = DisequalityEntry.first; + ClassSet DisequalClasses = DisequalityEntry.second; + State = + EquivalenceClass::simplify(Builder, RangeFactory, RCM, State, Class); + if (!State) + return false; + } + return true; } @@ -2122,6 +2140,61 @@ inline Optional EquivalenceClass::areEqual(ProgramStateRef State, return llvm::None; } +LLVM_NODISCARD ProgramStateRef +EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { + + SymbolSet ClsMembers = getClassMembers(State); + assert(ClsMembers.contains(Old)); + + // We don't remove `Old`'s Sym->Class relation for two reasons: + // 1) This way constraints for the old symbol can still be found via it's + // equivalence class that it used to be the member of. + // 2) Performance and resource reasons. We can spare one removal and thus one + // additional tree in the forest of `ClassMap`. + + // Remove `Old`'s Class->Sym relation. + SymbolSet::Factory &F = getMembersFactory(State); + ClassMembersTy::Factory &EMFactory = State->get_context(); + ClsMembers = F.remove(ClsMembers, Old); + // Ensure another precondition of the removeMember function (we can check + // this only with isEmpty, thus we have to do the remove first). + assert(!ClsMembers.isEmpty() && + "Class should have had at least two members before member removal"); + // Overwrite the existing members assigned to this class. + ClassMembersTy ClassMembersMap = State->get(); + ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers); + State = State->set(ClassMembersMap); + + return State; +} + +// Re-evaluate an SVal with top-level `State->assume` logic. +LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, + const RangeSet *Constraint, + SVal TheValue) { + if (!Constraint) + return State; + + const auto DefinedVal = TheValue.castAs(); + + // If the SVal is 0, we can simply interpret that as `false`. + if (Constraint->encodesFalseRange()) + return State->assume(DefinedVal, false); + + // If the constraint does not encode 0 then we can interpret that as `true` + // AND as a Range(Set). + if (Constraint->encodesTrueRange()) { + State = State->assume(DefinedVal, true); + if (!State) + return nullptr; + // Fall through, re-assume based on the range values as well. + } + // Overestimate the individual Ranges with the RangeSet' lowest and + // highest values. + return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(), + Constraint->getMaxValue(), true); +} + // Iterate over all symbols and try to simplify them. Once a symbol is // simplified then we check if we can merge the simplified symbol's equivalence // class to this class. This way, we simplify not just the symbols but the @@ -2158,22 +2231,36 @@ LLVM_NODISCARD ProgramStateRef EquivalenceClass::simplify( if (OldState == State) continue; - // Initiate the reorganization of the equality information. E.g., if we - // have `c + 1 == 0` then we'd like to express that `c == -1`. It makes - // sense to do this only with `SymIntExpr`s. - // TODO Handle `IntSymExpr` as well, once computeAdjustment can handle - // them. - if (const SymIntExpr *SIE = dyn_cast(SimplifiedMemberSym)) { - if (const RangeSet *ClassConstraint = getConstraint(State, Class)) { - // Overestimate the individual Ranges with the RangeSet' lowest and - // highest values. - State = RCM.assumeSymInclusiveRange( - State, SIE, ClassConstraint->getMinValue(), - ClassConstraint->getMaxValue(), /*InRange=*/true); - if (!State) - return nullptr; - } - } + assert(find(State, MemberSym) == find(State, SimplifiedMemberSym)); + // Remove the old and more complex symbol. + State = find(State, MemberSym).removeMember(State, MemberSym); + + // Query the class constraint again b/c that may have changed during the + // merge above. + const RangeSet *ClassConstraint = getConstraint(State, Class); + + // Re-evaluate an SVal with top-level `State->assume`, this ignites + // a RECURSIVE algorithm that will reach a FIXPOINT. + // + // About performance and complexity: Let us assume that in a State we + // have N non-trivial equivalence classes and that all constraints and + // disequality info is related to non-trivial classes. In the worst case, + // we can simplify only one symbol of one class in each iteration. The + // number of symbols in one class cannot grow b/c we replace the old + // symbol with the simplified one. Also, the number of the equivalence + // classes can decrease only, b/c the algorithm does a merge operation + // optionally. We need N iterations in this case to reach the fixpoint. + // Thus, the steps needed to be done in the worst case is proportional to + // N*N. + // + // This worst case scenario can be extended to that case when we have + // trivial classes in the constraints and in the disequality map. This + // case can be reduced to the case with a State where there are only + // non-trivial classes. This is because a merge operation on two trivial + // classes results in one non-trivial class. + State = reAssume(State, ClassConstraint, SimplifiedMemberVal); + if (!State) + return nullptr; } } return State; diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c index 34514335f9e809..08a1c6cbd60b44 100644 --- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c +++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c @@ -15,7 +15,7 @@ void test_equivalence_classes(int a, int b, int c, int d) { return; } - // CHECK: "equivalence_classes": [ - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ], - // CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$0", "reg_$2", "reg_$3" ] - // CHECK-NEXT: ], +// CHECK: "equivalence_classes": [ +// CHECK-NEXT: [ "(reg_$0) != (reg_$2)" ], +// CHECK-NEXT: [ "reg_$0", "reg_$2", "reg_$3" ] +// CHECK-NEXT: ], diff --git a/clang/test/Analysis/symbol-simplification-disequality-info.cpp b/clang/test/Analysis/symbol-simplification-disequality-info.cpp new file mode 100644 index 00000000000000..69238b583eb846 --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-disequality-info.cpp @@ -0,0 +1,65 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: 2>&1 | FileCheck %s + +// In this test we check how the solver's symbol simplification mechanism +// simplifies the disequality info. + +void clang_analyzer_printState(); + +void test(int a, int b, int c, int d) { + if (a + b + c == d) + return; + clang_analyzer_printState(); + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], + + + // Simplification starts here. + if (b != 0) + return; + clang_analyzer_printState(); + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "(reg_$0) + (reg_$2)" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "(reg_$0) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], + + if (c != 0) + return; + clang_analyzer_printState(); + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$0" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$0" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], + + // Keep the symbols and the constraints! alive. + (void)(a * b * c * d); + return; +} diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp new file mode 100644 index 00000000000000..288cb1b332843b --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-fixpoint-iteration-unreachable-code.cpp @@ -0,0 +1,55 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -verify + +// In this test we check whether the solver's symbol simplification mechanism +// is capable of reaching a fixpoint. + +void clang_analyzer_warnIfReached(); + +void test_contradiction(int a, int b, int c, int d, int x) { + if (a + b + c != d) + return; + if (a == d) + return; + if (b + c != 0) + return; + clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} + + // Bring in the contradiction. + if (b != 0) + return; + + // After the simplification of `b == 0` we have: + // b == 0 + // a + c == d + // a != d + // c == 0 + // Doing another iteration we reach the fixpoint (with a contradiction): + // b == 0 + // a == d + // a != d + // c == 0 + clang_analyzer_warnIfReached(); // no-warning, i.e. UNREACHABLE + + // Enabling expensive checks would trigger an assertion failure here without + // the fixpoint iteration. + if (a + c == x) + return; + + // Keep the symbols and the constraints! alive. + (void)(a * b * c * d * x); + return; +} + +void test_true_range_contradiction(int a, unsigned b) { + if (!(b > a)) // unsigned b > int a + return; + if (a != -1) // int a == -1 + return; // Starts a simplification of `unsigned b > int a`, + // that results in `unsigned b > UINT_MAX`, + // which is always false, so the State is infeasible. + clang_analyzer_warnIfReached(); // no-warning + (void)(a * b); +} diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp new file mode 100644 index 00000000000000..73922d420a8c3d --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: 2>&1 | FileCheck %s + +// In this test we check whether the solver's symbol simplification mechanism +// is capable of reaching a fixpoint. This should be done after one iteration. + +void clang_analyzer_printState(); + +void test(int a, int b, int c) { + if (a + b != c) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "((reg_$0) + (reg_$1)) != (reg_$2)", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Simplification starts here. + if (b != 0) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$2)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) != (reg_$2)" ], + // CHECK-NEXT: [ "reg_$0", "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Keep the symbols and the constraints! alive. + (void)(a * b * c); + return; +} diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp new file mode 100644 index 00000000000000..679ed3fda7a7a7 --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp @@ -0,0 +1,45 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: 2>&1 | FileCheck %s + +// In this test we check whether the solver's symbol simplification mechanism +// is capable of reaching a fixpoint. This should be done after TWO iterations. + +void clang_analyzer_printState(); + +void test(int a, int b, int c, int d) { + if (a + b + c != d) + return; + if (c + b != 0) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(((reg_$0) + (reg_$1)) + (reg_$2)) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "(reg_$2) + (reg_$1)", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "reg_$3" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Simplification starts here. + if (b != 0) + return; + clang_analyzer_printState(); + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$2", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(reg_$0) != (reg_$3)" ], + // CHECK-NEXT: [ "reg_$0", "reg_$3" ], + // CHECK-NEXT: [ "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, + + // Keep the symbols and the constraints! alive. + (void)(a * b * c * d); + return; +} diff --git a/clang/test/Analysis/symbol-simplification-reassume.cpp b/clang/test/Analysis/symbol-simplification-reassume.cpp new file mode 100644 index 00000000000000..a732170c969af3 --- /dev/null +++ b/clang/test/Analysis/symbol-simplification-reassume.cpp @@ -0,0 +1,37 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -analyzer-config eagerly-assume=false \ +// RUN: -verify + +// In this test we check whether the solver's symbol simplification mechanism +// is capable of re-assuming simiplified symbols. + +void clang_analyzer_eval(bool); +void clang_analyzer_warnIfReached(); + +void test_reassume_false_range(int x, int y) { + if (x + y != 0) // x + y == 0 + return; + if (x != 1) // x == 1 + return; + clang_analyzer_eval(y == -1); // expected-warning{{TRUE}} +} + +void test_reassume_true_range(int x, int y) { + if (x + y != 1) // x + y == 1 + return; + if (x != 1) // x == 1 + return; + clang_analyzer_eval(y == 0); // expected-warning{{TRUE}} +} + +void test_reassume_inclusive_range(int x, int y) { + if (x + y < 0 || x + y > 1) // x + y: [0, 1] + return; + if (x != 1) // x == 1 + return; + // y: [-1, 0] + clang_analyzer_eval(y > 0); // expected-warning{{FALSE}} + clang_analyzer_eval(y < -1);// expected-warning{{FALSE}} +}