diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h index 405e93287a05d..9a0a00f3c0134 100644 --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -58,19 +58,36 @@ class ControlFlowContext { return BlockReachable[B.getBlockID()]; } + /// Returns whether `B` contains an expression that is consumed in a + /// different block than `B` (i.e. the parent of the expression is in a + /// different block). + /// This happens if there is control flow within a full-expression (triggered + /// by `&&`, `||`, or the conditional operator). Note that the operands of + /// these operators are not the only expressions that can be consumed in a + /// different block. For example, in the function call + /// `f(&i, cond() ? 1 : 0)`, `&i` is in a different block than the `CallExpr`. + bool containsExprConsumedInDifferentBlock(const CFGBlock &B) const { + return ContainsExprConsumedInDifferentBlock.contains(&B); + } + private: - ControlFlowContext(const Decl &D, std::unique_ptr Cfg, - llvm::DenseMap StmtToBlock, - llvm::BitVector BlockReachable) + ControlFlowContext( + const Decl &D, std::unique_ptr Cfg, + llvm::DenseMap StmtToBlock, + llvm::BitVector BlockReachable, + llvm::DenseSet ContainsExprConsumedInDifferentBlock) : ContainingDecl(D), Cfg(std::move(Cfg)), StmtToBlock(std::move(StmtToBlock)), - BlockReachable(std::move(BlockReachable)) {} + BlockReachable(std::move(BlockReachable)), + ContainsExprConsumedInDifferentBlock( + std::move(ContainsExprConsumedInDifferentBlock)) {} /// The `Decl` containing the statement used to construct the CFG. const Decl &ContainingDecl; std::unique_ptr Cfg; llvm::DenseMap StmtToBlock; llvm::BitVector BlockReachable; + llvm::DenseSet ContainsExprConsumedInDifferentBlock; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 62e7af7ac219b..e8f009ef6c791 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -210,6 +210,14 @@ class Environment { bool equivalentTo(const Environment &Other, Environment::ValueModel &Model) const; + /// How to treat expression state (`ExprToLoc` and `ExprToVal`) in a join. + /// If the join happens within a full expression, expression state should be + /// kept; otherwise, we can discard it. + enum ExprJoinBehavior { + DiscardExprState, + KeepExprState, + }; + /// Joins two environments by taking the intersection of storage locations and /// values that are stored in them. Distinct values that are assigned to the /// same storage locations in `EnvA` and `EnvB` are merged using `Model`. @@ -218,7 +226,8 @@ class Environment { /// /// `EnvA` and `EnvB` must use the same `DataflowAnalysisContext`. static Environment join(const Environment &EnvA, const Environment &EnvB, - Environment::ValueModel &Model); + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior); /// Widens the environment point-wise, using `PrevEnv` as needed to inform the /// approximation. diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index 8aed19544be6a..7c9f8fbb0a700 100644 --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -94,6 +94,38 @@ static llvm::BitVector findReachableBlocks(const CFG &Cfg) { return BlockReachable; } +static llvm::DenseSet +buildContainsExprConsumedInDifferentBlock( + const CFG &Cfg, + const llvm::DenseMap &StmtToBlock) { + llvm::DenseSet Result; + + auto CheckChildExprs = [&Result, &StmtToBlock](const Stmt *S, + const CFGBlock *Block) { + for (const Stmt *Child : S->children()) { + if (!isa(Child)) + continue; + const CFGBlock *ChildBlock = StmtToBlock.lookup(Child); + if (ChildBlock != Block) + Result.insert(ChildBlock); + } + }; + + for (const CFGBlock *Block : Cfg) { + if (Block == nullptr) + continue; + + for (const CFGElement &Element : *Block) + if (auto S = Element.getAs()) + CheckChildExprs(S->getStmt(), Block); + + if (const Stmt *TerminatorCond = Block->getTerminatorCondition()) + CheckChildExprs(TerminatorCond, Block); + } + + return Result; +} + llvm::Expected ControlFlowContext::build(const FunctionDecl &Func) { if (!Func.doesThisDeclarationHaveABody()) @@ -140,8 +172,12 @@ ControlFlowContext::build(const Decl &D, Stmt &S, ASTContext &C) { llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + llvm::DenseSet ContainsExprConsumedInDifferentBlock = + buildContainsExprConsumedInDifferentBlock(*Cfg, StmtToBlock); + return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock), - std::move(BlockReachable)); + std::move(BlockReachable), + std::move(ContainsExprConsumedInDifferentBlock)); } } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index fd7b06efcc786..62332a18c44a4 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -48,6 +48,24 @@ static llvm::DenseMap intersectDeclToLoc( return Result; } +// Performs a join on either `ExprToLoc` or `ExprToVal`. +// The maps must be consistent in the sense that any entries for the same +// expression must map to the same location / value. This is the case if we are +// performing a join for control flow within a full-expression (which is the +// only case when this function should be used). +template MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { + MapT Result = Map1; + + for (const auto &Entry : Map2) { + [[maybe_unused]] auto [It, Inserted] = Result.insert(Entry); + // If there was an existing entry, its value should be the same as for the + // entry we were trying to insert. + assert(It->second == Entry.second); + } + + return Result; +} + // Whether to consider equivalent two values with an unknown relation. // // FIXME: this function is a hack enabling unsoundness to support @@ -627,7 +645,8 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, } Environment Environment::join(const Environment &EnvA, const Environment &EnvB, - Environment::ValueModel &Model) { + Environment::ValueModel &Model, + ExprJoinBehavior ExprBehavior) { assert(EnvA.DACtx == EnvB.DACtx); assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); assert(EnvA.CallStack == EnvB.CallStack); @@ -675,9 +694,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB, JoinedEnv.LocToVal = joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); - // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal` - // empty, as we never need to access entries in these maps outside of the - // basic block that sets them. + if (ExprBehavior == KeepExprState) { + JoinedEnv.ExprToVal = joinExprMaps(EnvA.ExprToVal, EnvB.ExprToVal); + JoinedEnv.ExprToLoc = joinExprMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); + } return JoinedEnv; } diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index 4c88c46142d64..a9f39e153d0ce 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -221,6 +221,7 @@ class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry { // Avoids unneccesary copies of the environment. class JoinedStateBuilder { AnalysisContext &AC; + Environment::ExprJoinBehavior JoinBehavior; std::vector All; std::deque Owned; @@ -228,11 +229,13 @@ class JoinedStateBuilder { join(const TypeErasedDataflowAnalysisState &L, const TypeErasedDataflowAnalysisState &R) { return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice), - Environment::join(L.Env, R.Env, AC.Analysis)}; + Environment::join(L.Env, R.Env, AC.Analysis, JoinBehavior)}; } public: - JoinedStateBuilder(AnalysisContext &AC) : AC(AC) {} + JoinedStateBuilder(AnalysisContext &AC, + Environment::ExprJoinBehavior JoinBehavior) + : AC(AC), JoinBehavior(JoinBehavior) {} void addOwned(TypeErasedDataflowAnalysisState State) { Owned.push_back(std::move(State)); @@ -248,12 +251,12 @@ class JoinedStateBuilder { // initialize the state of each basic block differently. return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()}; if (All.size() == 1) - // Join the environment with itself so that we discard the entries from - // `ExprToLoc` and `ExprToVal`. + // Join the environment with itself so that we discard expression state if + // desired. // FIXME: We could consider writing special-case code for this that only // does the discarding, but it's not clear if this is worth it. - return {All[0]->Lattice, - Environment::join(All[0]->Env, All[0]->Env, AC.Analysis)}; + return {All[0]->Lattice, Environment::join(All[0]->Env, All[0]->Env, + AC.Analysis, JoinBehavior)}; auto Result = join(*All[0], *All[1]); for (unsigned I = 2; I < All.size(); ++I) @@ -307,7 +310,22 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { } } - JoinedStateBuilder Builder(AC); + // If any of the predecessor blocks contains an expression consumed in a + // different block, we need to keep expression state. + // Note that in this case, we keep expression state for all predecessors, + // rather than only those predecessors that actually contain an expression + // consumed in a different block. While this is potentially suboptimal, it's + // actually likely, if we have control flow within a full expression, that + // all predecessors have expression state consumed in a different block. + Environment::ExprJoinBehavior JoinBehavior = Environment::DiscardExprState; + for (const CFGBlock *Pred : Preds) { + if (Pred && AC.CFCtx.containsExprConsumedInDifferentBlock(*Pred)) { + JoinBehavior = Environment::KeepExprState; + break; + } + } + + JoinedStateBuilder Builder(AC, JoinBehavior); for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index 8799d03dfd3c5..465a8e21690c4 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -190,7 +190,8 @@ TEST_F(EnvironmentTest, JoinRecords) { Env2.setValue(Loc, Val2); Environment::ValueModel Model; - Environment EnvJoined = Environment::join(Env1, Env2, Model); + Environment EnvJoined = + Environment::join(Env1, Env2, Model, Environment::DiscardExprState); auto *JoinedVal = cast(EnvJoined.getValue(Loc)); EXPECT_NE(JoinedVal, &Val1); EXPECT_NE(JoinedVal, &Val2); diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index 34f9b0b23719f..9d05a0d6ca401 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -244,15 +244,17 @@ TEST_F(DiscardExprStateTest, WhileStatement) { EXPECT_NE(NotEqOpState.Env.getValue(NotEqOp), nullptr); // In the block that calls `foo(p)`, the value for `p != nullptr` is discarded - // because it is not consumed by this block. + // because it is not consumed outside the block it is in. const auto &CallFooState = blockStateForStmt(BlockStates, CallFoo); EXPECT_EQ(CallFooState.Env.getValue(NotEqOp), nullptr); } TEST_F(DiscardExprStateTest, BooleanOperator) { std::string Code = R"( - bool target(bool b1, bool b2) { - return b1 && b2; + void f(); + void target(bool b1, bool b2) { + if (b1 && b2) + f(); } )"; auto BlockStates = llvm::cantFail(runAnalysis( @@ -260,46 +262,80 @@ TEST_F(DiscardExprStateTest, BooleanOperator) { const auto &AndOp = matchNode(binaryOperator(hasOperatorName("&&"))); - const auto &Return = matchNode(returnStmt()); + const auto &CallF = + matchNode(callExpr(callee(functionDecl(hasName("f"))))); // In the block that evaluates the LHS of the `&&` operator, the LHS is // associated with a value, while the right-hand side is not (unsurprisingly, // as it hasn't been evaluated yet). const auto &LHSState = blockStateForStmt(BlockStates, *AndOp.getLHS()); auto *LHSValue = cast(LHSState.Env.getValue(*AndOp.getLHS())); - ASSERT_NE(LHSValue, nullptr); + EXPECT_NE(LHSValue, nullptr); EXPECT_EQ(LHSState.Env.getValue(*AndOp.getRHS()), nullptr); - // In the block that evaluates the RHS, the RHS is associated with a - // value. The value for the LHS has been discarded as it is not consumed by - // this block. + // In the block that evaluates the RHS, both the LHS and RHS are associated + // with values, as they are both subexpressions of the `&&` operator, which + // is evaluated in a later block. const auto &RHSState = blockStateForStmt(BlockStates, *AndOp.getRHS()); - EXPECT_EQ(RHSState.Env.getValue(*AndOp.getLHS()), nullptr); - auto *RHSValue = cast(RHSState.Env.getValue(*AndOp.getRHS())); - ASSERT_NE(RHSValue, nullptr); - - // In the block that evaluates the return statement, the expression `b1 && b2` - // is associated with a value (and check that it's the right one). - // The expressions `b1` and `b2` are _not_ associated with a value in this - // block, even though they are consumed by the block, because: - // * This block has two prececessor blocks (the one that evaluates `b1` and - // the one that evaluates `b2`). - // * `b1` is only associated with a value in the block that evaluates `b1` but - // not the block that evalutes `b2`, so the join operation discards the - // value for `b1`. - // * `b2` is only associated with a value in the block that evaluates `b2` but - // not the block that evaluates `b1`, the the join operation discards the - // value for `b2`. - // Nevertheless, the analysis generates the correct formula for `b1 && b2` - // because the transfer function for the `&&` operator retrieves the values - // for its operands from the environments for the blocks that compute the - // operands, rather than from the environment for the block that contains the - // `&&`. - const auto &ReturnState = blockStateForStmt(BlockStates, Return); - EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getLHS()), nullptr); - EXPECT_EQ(ReturnState.Env.getValue(*AndOp.getRHS()), nullptr); - EXPECT_EQ(ReturnState.Env.getValue(AndOp), - &ReturnState.Env.makeAnd(*LHSValue, *RHSValue)); + EXPECT_EQ(RHSState.Env.getValue(*AndOp.getLHS()), LHSValue); + auto *RHSValue = RHSState.Env.get(*AndOp.getRHS()); + EXPECT_NE(RHSValue, nullptr); + + // In the block that evaluates `b1 && b2`, the `&&` as well as its operands + // are associated with values. + const auto &AndOpState = blockStateForStmt(BlockStates, AndOp); + EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getLHS()), LHSValue); + EXPECT_EQ(AndOpState.Env.getValue(*AndOp.getRHS()), RHSValue); + EXPECT_EQ(AndOpState.Env.getValue(AndOp), + &AndOpState.Env.makeAnd(*LHSValue, *RHSValue)); + + // In the block that calls `f()`, none of `b1`, `b2`, or `b1 && b2` should be + // associated with values. + const auto &CallFState = blockStateForStmt(BlockStates, CallF); + EXPECT_EQ(CallFState.Env.getValue(*AndOp.getLHS()), nullptr); + EXPECT_EQ(CallFState.Env.getValue(*AndOp.getRHS()), nullptr); + EXPECT_EQ(CallFState.Env.getValue(AndOp), nullptr); +} + +TEST_F(DiscardExprStateTest, ConditionalOperator) { + std::string Code = R"( + void f(int*, int); + void g(); + bool cond(); + + void target() { + int i = 0; + if (cond()) + f(&i, cond() ? 1 : 0); + g(); + } + )"; + auto BlockStates = llvm::cantFail(runAnalysis( + Code, [](ASTContext &C) { return NoopAnalysis(C); })); + + const auto &AddrOfI = + matchNode(unaryOperator(hasOperatorName("&"))); + const auto &CallF = + matchNode(callExpr(callee(functionDecl(hasName("f"))))); + const auto &CallG = + matchNode(callExpr(callee(functionDecl(hasName("g"))))); + + // In the block that evaluates `&i`, it should obviously have a value. + const auto &AddrOfIState = blockStateForStmt(BlockStates, AddrOfI); + auto *AddrOfIVal = AddrOfIState.Env.get(AddrOfI); + EXPECT_NE(AddrOfIVal, nullptr); + + // Because of the conditional operator, the `f(...)` call is evaluated in a + // different block than `&i`, but `&i` still needs to have a value here + // because it's a subexpression of the call. + const auto &CallFState = blockStateForStmt(BlockStates, CallF); + EXPECT_NE(&CallFState, &AddrOfIState); + EXPECT_EQ(CallFState.Env.get(AddrOfI), AddrOfIVal); + + // In the block that calls `g()`, `&i` should no longer be associated with a + // value. + const auto &CallGState = blockStateForStmt(BlockStates, CallG); + EXPECT_EQ(CallGState.Env.get(AddrOfI), nullptr); } struct NonConvergingLattice {