From 5ee040ba52ae5f1bbb4a579bae387eed192ea6eb Mon Sep 17 00:00:00 2001 From: Samira Bakon Date: Mon, 6 Oct 2025 10:53:05 -0400 Subject: [PATCH 1/3] [clang][dataflow] Copy only the fields present in the current derived type. Our modeling of cast expressions now results in the potential presence of fields from other types derived from the SourceLocation's type. Also add some additional debug logging and new tests, and update an existing test to reflect real usage of synthetic field callbacks. --- .../Analysis/FlowSensitive/StorageLocation.h | 11 +++ .../lib/Analysis/FlowSensitive/RecordOps.cpp | 42 +++++++++-- .../Analysis/FlowSensitive/RecordOpsTest.cpp | 4 +- .../Analysis/FlowSensitive/TransferTest.cpp | 74 +++++++++++++++++++ 4 files changed, 121 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h index 534b9a017d8f0..5d9a0f702a299 100644 --- a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h +++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h @@ -144,6 +144,17 @@ class RecordStorageLocation final : public StorageLocation { /// The synthetic field must exist. StorageLocation &getSyntheticField(llvm::StringRef Name) const { StorageLocation *Loc = SyntheticFields.lookup(Name); + LLVM_DEBUG({ + if (Loc == nullptr) { + llvm::dbgs() << "Couldn't find synthetic field " << Name + << " on StorageLocation " << this << " of type " + << getType() << "\n"; + llvm::dbgs() << "Existing synthetic fields:\n"; + for ([[maybe_unused]] const auto &[Name, Loc] : SyntheticFields) { + llvm::dbgs() << Name << "\n"; + } + } + }); assert(Loc != nullptr); return *Loc; } diff --git a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp index ed827ac2c7c90..03d6ed8020a0a 100644 --- a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp +++ b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp @@ -14,6 +14,9 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Type.h" +#include "clang/Analysis/FlowSensitive/ASTOps.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/StringMap.h" #define DEBUG_TYPE "dataflow" @@ -79,18 +82,41 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst, if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr && SrcDecl->isDerivedFrom(DstDecl))) { + // Dst may have children modeled from other derived types than SrcType, e.g. + // after casts of Dst to other types derived from DstType. Only copy the + // children and synthetic fields present in both Dst and SrcType. + const FieldSet FieldsInSrcType = + Env.getDataflowAnalysisContext().getModeledFields(SrcType); for (auto [Field, DstFieldLoc] : Dst.children()) - copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env); + if (const auto *FieldAsFieldDecl = dyn_cast(Field); + FieldAsFieldDecl && FieldsInSrcType.contains(FieldAsFieldDecl)) + copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env); + const llvm::StringMap SyntheticFieldsForSrcType = + Env.getDataflowAnalysisContext().getSyntheticFields(SrcType); for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields()) - copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name), - *DstFieldLoc, Env); + if (SyntheticFieldsForSrcType.contains(Name)) + copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name), + *DstFieldLoc, Env); } else if (SrcDecl != nullptr && DstDecl != nullptr && DstDecl->isDerivedFrom(SrcDecl)) { - for (auto [Field, SrcFieldLoc] : Src.children()) - copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env); - for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) - copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc, - Dst.getSyntheticField(Name), Env); + // Src may have children modeled from other derived types than DstType, e.g. + // after other casts of Src to those types (likely in different branches, + // but without flow-condition-dependent field modeling). Only copy the + // children and synthetic fields of Src that are present in DstType. + const FieldSet FieldsInDstType = + Env.getDataflowAnalysisContext().getModeledFields(DstType); + for (auto [Field, SrcFieldLoc] : Src.children()) { + if (const auto *FieldAsFieldDecl = dyn_cast(Field); + FieldAsFieldDecl && FieldsInDstType.contains(FieldAsFieldDecl)) + copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env); + } + const llvm::StringMap SyntheticFieldsForDstType = + Env.getDataflowAnalysisContext().getSyntheticFields(DstType); + for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) { + if (SyntheticFieldsForDstType.contains(Name)) + copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc, + Dst.getSyntheticField(Name), Env); + } } else { for (const FieldDecl *Field : Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) { diff --git a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp index 57162cd2efcc4..b29bd2e9560af 100644 --- a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp @@ -213,9 +213,9 @@ TEST(RecordOpsTest, CopyRecordBetweenDerivedAndBase) { )"; auto SyntheticFieldCallback = [](QualType Ty) -> llvm::StringMap { CXXRecordDecl *ADecl = nullptr; - if (Ty.getAsString() == "A") + if (Ty.getAsString() == "A" || Ty.getAsString() == "struct A") ADecl = Ty->getAsCXXRecordDecl(); - else if (Ty.getAsString() == "B") + else if (Ty.getAsString() == "B" || Ty.getAsString() == "struct B") ADecl = Ty->getAsCXXRecordDecl() ->bases_begin() ->getType() diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index cbd55966a3d88..de9008e138155 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3709,6 +3709,80 @@ TEST(TransferTest, StaticCastBaseToDerived) { }); } +TEST(TransferTest, MultipleConstructionsFromStaticCastsBaseToDerived) { + std::string Code = R"cc( + struct Base {}; + +struct DerivedOne : public Base { + // Need a field in one of the derived siblings that the other doesn't have. + int I; +}; + +struct DerivedTwo : public Base {}; + +int getInt(); + +void target(Base* B) { + // Need something to cause modeling of I. + DerivedOne D1; + (void)D1.I; + + // Switch cases are a reasonable pattern where the same variable might be + // safely cast to two different derived types within the same function + // without resetting the value of the variable. getInt is a stand-in for what + // is usually a function indicating the dynamic derived type. + switch (getInt()) { + case 1: + // Need a CXXConstructExpr or copy/move CXXOperatorCallExpr from each of + // the casts to derived types, cast from the same base variable, to + // trigger the copyRecord behavior. + (void)new DerivedOne(*static_cast(B)); + break; + case 2: + (void)new DerivedTwo(*static_cast(B)); + break; + }; +} +)cc"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + // This is a crash repro. We used to crash when transferring the + // construction of DerivedTwo because B's StorageLocation had a child + // for the field I, but DerivedTwo doesn't. Now, we should only copy the + // fields from B that are present in DerivedTwo. + }); +} + +TEST(TransferTest, CopyConstructionOfBaseAfterStaticCastsBaseToDerived) { + std::string Code = R"cc( + struct Base {}; + +struct Derived : public Base { +// Need a field in Derived that is not in Base. + char C; +}; + +void target(Base* B, Base* OtherB) { + Derived* D = static_cast(B); + *B = *OtherB; + // Need something to cause modeling of C. + (void)D->C; +} + +)cc"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + // This is a crash repro. We used to crash when transferring the + // copy construction of B from OtherB because B's StorageLocation had a + // child for the field C, but Base doesn't. Now, we should only copy the + // fields from B that are present in Base. + }); +} + TEST(TransferTest, ExplicitDerivedToBaseCast) { std::string Code = R"cc( struct Base {}; From 3bcdc0da6110c884cc29c21a555cc0866c9689d9 Mon Sep 17 00:00:00 2001 From: Samira Bakon Date: Mon, 6 Oct 2025 11:48:42 -0400 Subject: [PATCH 2/3] Add more flexible string comparisons in tests' synthetic field callbacks. --- .../Analysis/FlowSensitive/RecordOpsTest.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp index b29bd2e9560af..73390a829464a 100644 --- a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp @@ -64,7 +64,8 @@ TEST(RecordOpsTest, CopyRecord) { runDataflow( Code, [](QualType Ty) -> llvm::StringMap { - if (Ty.getAsString() != "S") + std::string TypeAsString = Ty.getAsString(); + if (TypeAsString != "S" && TypeAsString != "struct S") return {}; QualType IntTy = getFieldNamed(Ty->getAsRecordDecl(), "outer_int")->getType(); @@ -123,7 +124,8 @@ TEST(RecordOpsTest, RecordsEqual) { runDataflow( Code, [](QualType Ty) -> llvm::StringMap { - if (Ty.getAsString() != "S") + std::string TypeAsString = Ty.getAsString(); + if (TypeAsString != "S" && TypeAsString != "struct S") return {}; QualType IntTy = getFieldNamed(Ty->getAsRecordDecl(), "outer_int")->getType(); @@ -213,9 +215,10 @@ TEST(RecordOpsTest, CopyRecordBetweenDerivedAndBase) { )"; auto SyntheticFieldCallback = [](QualType Ty) -> llvm::StringMap { CXXRecordDecl *ADecl = nullptr; - if (Ty.getAsString() == "A" || Ty.getAsString() == "struct A") + std::string TypeAsString = Ty.getAsString(); + if (TypeAsString == "A" || TypeAsString == "struct A") ADecl = Ty->getAsCXXRecordDecl(); - else if (Ty.getAsString() == "B" || Ty.getAsString() == "struct B") + else if (TypeAsString == "B" || TypeAsString == "struct B") ADecl = Ty->getAsCXXRecordDecl() ->bases_begin() ->getType() From 1f3f547e887ddeca5f3c8f75e7205da82dad50c1 Mon Sep 17 00:00:00 2001 From: Samira Bakon Date: Mon, 6 Oct 2025 13:41:23 -0400 Subject: [PATCH 3/3] Fix and expand comment in test. --- clang/unittests/Analysis/FlowSensitive/TransferTest.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index de9008e138155..66b3bba594fc9 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -3778,8 +3778,11 @@ void target(Base* B, Base* OtherB) { ASTContext &ASTCtx) { // This is a crash repro. We used to crash when transferring the // copy construction of B from OtherB because B's StorageLocation had a - // child for the field C, but Base doesn't. Now, we should only copy the - // fields from B that are present in Base. + // child for the field C, but Base doesn't (so OtherB doesn't, since + // it's never been cast to any other type), and we tried to copy from + // the source (OtherB) all the fields present in the destination (B). + // Now, we should only try to copy the fields from OtherB that are + // present in Base. }); } @@ -5394,7 +5397,7 @@ TEST(TransferTest, UnsupportedValueEquality) { A, B }; - + void target() { EC ec = EC::A;