diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index 7f8c70d169376..62e7af7ac219b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -723,9 +723,12 @@ RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, const Environment &Env); -/// Returns the fields of `RD` that are initialized by an `InitListExpr`, in the -/// order in which they appear in `InitListExpr::inits()`. -std::vector getFieldsForInitListExpr(const RecordDecl *RD); +/// Returns the fields of a `RecordDecl` that are initialized by an +/// `InitListExpr`, in the order in which they appear in +/// `InitListExpr::inits()`. +/// `Init->getType()` must be a record type. +std::vector +getFieldsForInitListExpr(const InitListExpr *InitList); /// Associates a new `RecordValue` with `Loc` and returns the new value. RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env); diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index d487944ce9211..fd7b06efcc786 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -361,8 +361,8 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, if (const auto *FD = dyn_cast(VD)) Fields.insert(FD); } else if (auto *InitList = dyn_cast(&S)) { - if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) - for (const auto *FD : getFieldsForInitListExpr(RD)) + if (InitList->getType()->isRecordType()) + for (const auto *FD : getFieldsForInitListExpr(InitList)) Fields.insert(FD); } } @@ -983,7 +983,7 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D, } Value *Val = nullptr; - if (InitExpr) + if (InitExpr) { // In the (few) cases where an expression is intentionally // "uninterpreted", `InitExpr` is not associated with a value. There are // two ways to handle this situation: propagate the status, so that @@ -998,6 +998,11 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D, // default value (assuming we don't update the environment API to return // references). Val = getValue(*InitExpr); + + if (!Val && isa(InitExpr) && + InitExpr->getType()->isPointerType()) + Val = &getOrCreateNullPointerValue(InitExpr->getType()->getPointeeType()); + } if (!Val) Val = createValue(Ty); @@ -1104,12 +1109,22 @@ RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, return Env.get(*Base); } -std::vector getFieldsForInitListExpr(const RecordDecl *RD) { +std::vector +getFieldsForInitListExpr(const InitListExpr *InitList) { + const RecordDecl *RD = InitList->getType()->getAsRecordDecl(); + assert(RD != nullptr); + + std::vector Fields; + + if (InitList->getType()->isUnionType()) { + Fields.push_back(InitList->getInitializedFieldInUnion()); + return Fields; + } + // Unnamed bitfields are only used for padding and do not appear in // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s // field list, and we thus need to remove them before mapping inits to // fields to avoid mapping inits to the wrongs fields. - std::vector Fields; llvm::copy_if( RD->fields(), std::back_inserter(Fields), [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index 089854264f483..04aa2831df055 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -663,14 +663,7 @@ class TransferVisitor : public ConstStmtVisitor { void VisitInitListExpr(const InitListExpr *S) { QualType Type = S->getType(); - if (Type->isUnionType()) { - // FIXME: Initialize unions properly. - if (auto *Val = Env.createValue(Type)) - Env.setValue(*S, *Val); - return; - } - - if (!Type->isStructureOrClassType()) { + if (!Type->isRecordType()) { // Until array initialization is implemented, we skip arrays and don't // need to care about cases where `getNumInits() > 1`. if (!Type->isArrayType() && S->getNumInits() == 1) @@ -688,14 +681,26 @@ class TransferVisitor : public ConstStmtVisitor { llvm::DenseMap FieldLocs; // This only contains the direct fields for the given type. - std::vector FieldsForInit = - getFieldsForInitListExpr(Type->getAsRecordDecl()); + std::vector FieldsForInit = getFieldsForInitListExpr(S); - // `S->inits()` contains all the initializer epressions, including the + // `S->inits()` contains all the initializer expressions, including the // ones for direct base classes. - auto Inits = S->inits(); + ArrayRef Inits = S->inits(); size_t InitIdx = 0; + // Unions initialized with an empty initializer list need special treatment. + // For structs/classes initialized with an empty initializer list, Clang + // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions, + // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves. + std::optional ImplicitValueInitForUnion; + SmallVector InitsForUnion; + if (S->getType()->isUnionType() && Inits.empty()) { + assert(FieldsForInit.size() == 1); + ImplicitValueInitForUnion.emplace(FieldsForInit.front()->getType()); + InitsForUnion.push_back(&*ImplicitValueInitForUnion); + Inits = InitsForUnion; + } + // Initialize base classes. if (auto* R = S->getType()->getAsCXXRecordDecl()) { assert(FieldsForInit.size() + R->getNumBases() == Inits.size()); @@ -731,6 +736,17 @@ class TransferVisitor : public ConstStmtVisitor { FieldLocs.insert({Field, &Loc}); } + // In the case of a union, we don't in general have initializers for all + // of the fields. Create storage locations for the remaining fields (but + // don't associate them with values). + if (Type->isUnionType()) { + for (const FieldDecl *Field : + Env.getDataflowAnalysisContext().getModeledFields(Type)) { + if (auto [it, inserted] = FieldLocs.insert({Field, nullptr}); inserted) + it->second = &Env.createStorageLocation(Field->getType()); + } + } + // Check that we satisfy the invariant that a `RecordStorageLoation` // contains exactly the set of modeled fields for that type. // `ModeledFields` includes fields from all the bases, but only the diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index 0d36d2802897f..b7cf6cc966edb 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -432,6 +432,8 @@ llvm::Error checkDataflowWithNoopAnalysis( {}); /// Returns the `ValueDecl` for the given identifier. +/// The returned pointer is guaranteed to be non-null; the function asserts if +/// no `ValueDecl` with the given name is found. /// /// Requirements: /// @@ -475,6 +477,15 @@ ValueT &getValueForDecl(ASTContext &ASTCtx, const Environment &Env, return *cast(Env.getValue(*VD)); } +/// Returns the storage location for the field called `Name` of `Loc`. +/// Optionally casts the field storage location to `T`. +template +std::enable_if_t, T &> +getFieldLoc(const RecordStorageLocation &Loc, llvm::StringRef Name, + ASTContext &ASTCtx) { + return *cast(Loc.getChild(*findValueDecl(ASTCtx, Name))); +} + /// Returns the value of a `Field` on the record referenced by `Loc.` /// Returns null if `Loc` is null. inline Value *getFieldValue(const RecordStorageLocation *Loc, @@ -487,6 +498,14 @@ inline Value *getFieldValue(const RecordStorageLocation *Loc, return Env.getValue(*FieldLoc); } +/// Returns the value of a `Field` on the record referenced by `Loc.` +/// Returns null if `Loc` is null. +inline Value *getFieldValue(const RecordStorageLocation *Loc, + llvm::StringRef Name, ASTContext &ASTCtx, + const Environment &Env) { + return getFieldValue(Loc, *findValueDecl(ASTCtx, Name), Env); +} + /// Creates and owns constraints which are boolean values. class ConstraintContext { unsigned NextAtom = 0; diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 2be899f5b6da9..f534ccb125470 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -2392,14 +2392,92 @@ TEST(TransferTest, InitListExprAsUnion) { } F; public: - constexpr target() : F{nullptr} {} + constexpr target() : F{nullptr} { + int *null = nullptr; + F.b; // Make sure we reference 'b' so it is modeled. + // [[p]] + } }; )cc"; runDataflow( Code, [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { - // Just verify that it doesn't crash. + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto &FLoc = getFieldLoc( + *Env.getThisPointeeStorageLocation(), "F", ASTCtx); + auto *AVal = cast(getFieldValue(&FLoc, "a", ASTCtx, Env)); + EXPECT_EQ(AVal, &getValueForDecl(ASTCtx, Env, "null")); + EXPECT_EQ(getFieldValue(&FLoc, "b", ASTCtx, Env), nullptr); + }); +} + +TEST(TransferTest, EmptyInitListExprForUnion) { + // This is a crash repro. + std::string Code = R"cc( + class target { + union { + int *a; + bool *b; + } F; + + public: + // Empty initializer list means that `F` is aggregate-initialized. + // For a union, this has the effect that the first member of the union + // is copy-initialized from an empty initializer list; in this specific + // case, this has the effect of initializing `a` with null. + constexpr target() : F{} { + int *null = nullptr; + F.b; // Make sure we reference 'b' so it is modeled. + // [[p]] + } + }; + )cc"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto &FLoc = getFieldLoc( + *Env.getThisPointeeStorageLocation(), "F", ASTCtx); + auto *AVal = cast(getFieldValue(&FLoc, "a", ASTCtx, Env)); + EXPECT_EQ(AVal, &getValueForDecl(ASTCtx, Env, "null")); + EXPECT_EQ(getFieldValue(&FLoc, "b", ASTCtx, Env), nullptr); + }); +} + +TEST(TransferTest, EmptyInitListExprForStruct) { + std::string Code = R"cc( + class target { + struct { + int *a; + bool *b; + } F; + + public: + constexpr target() : F{} { + int *NullIntPtr = nullptr; + bool *NullBoolPtr = nullptr; + // [[p]] + } + }; + )cc"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + auto &FLoc = getFieldLoc( + *Env.getThisPointeeStorageLocation(), "F", ASTCtx); + auto *AVal = cast(getFieldValue(&FLoc, "a", ASTCtx, Env)); + EXPECT_EQ(AVal, + &getValueForDecl(ASTCtx, Env, "NullIntPtr")); + auto *BVal = cast(getFieldValue(&FLoc, "b", ASTCtx, Env)); + EXPECT_EQ(BVal, + &getValueForDecl(ASTCtx, Env, "NullBoolPtr")); }); }