-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang][dataflow] Copy only the fields present in the current derived… #162100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… type. Our modeling of cast expressions now results in the potential presence of fields from other types derived from the SourceLocation's type. Also add some additional debug logging and new tests, and update an existing test to reflect real usage of synthetic field callbacks.
@llvm/pr-subscribers-clang-analysis @llvm/pr-subscribers-clang Author: Samira Bakon (bazuzi) Changes… type. Our modeling of cast expressions now results in the potential presence of fields from other types derived from the SourceLocation's type. Also add some additional debug logging and new tests, and update an existing test to reflect real usage of synthetic field callbacks. Full diff: https://github.com/llvm/llvm-project/pull/162100.diff 4 Files Affected:
diff --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
index 534b9a017d8f0..5d9a0f702a299 100644
--- a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
+++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
@@ -144,6 +144,17 @@ class RecordStorageLocation final : public StorageLocation {
/// The synthetic field must exist.
StorageLocation &getSyntheticField(llvm::StringRef Name) const {
StorageLocation *Loc = SyntheticFields.lookup(Name);
+ LLVM_DEBUG({
+ if (Loc == nullptr) {
+ llvm::dbgs() << "Couldn't find synthetic field " << Name
+ << " on StorageLocation " << this << " of type "
+ << getType() << "\n";
+ llvm::dbgs() << "Existing synthetic fields:\n";
+ for ([[maybe_unused]] const auto &[Name, Loc] : SyntheticFields) {
+ llvm::dbgs() << Name << "\n";
+ }
+ }
+ });
assert(Loc != nullptr);
return *Loc;
}
diff --git a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
index ed827ac2c7c90..03d6ed8020a0a 100644
--- a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
+++ b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
@@ -14,6 +14,9 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringMap.h"
#define DEBUG_TYPE "dataflow"
@@ -79,18 +82,41 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst,
if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr &&
SrcDecl->isDerivedFrom(DstDecl))) {
+ // Dst may have children modeled from other derived types than SrcType, e.g.
+ // after casts of Dst to other types derived from DstType. Only copy the
+ // children and synthetic fields present in both Dst and SrcType.
+ const FieldSet FieldsInSrcType =
+ Env.getDataflowAnalysisContext().getModeledFields(SrcType);
for (auto [Field, DstFieldLoc] : Dst.children())
- copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+ if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+ FieldAsFieldDecl && FieldsInSrcType.contains(FieldAsFieldDecl))
+ copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+ const llvm::StringMap<QualType> SyntheticFieldsForSrcType =
+ Env.getDataflowAnalysisContext().getSyntheticFields(SrcType);
for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields())
- copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
- *DstFieldLoc, Env);
+ if (SyntheticFieldsForSrcType.contains(Name))
+ copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
+ *DstFieldLoc, Env);
} else if (SrcDecl != nullptr && DstDecl != nullptr &&
DstDecl->isDerivedFrom(SrcDecl)) {
- for (auto [Field, SrcFieldLoc] : Src.children())
- copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
- for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields())
- copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
- Dst.getSyntheticField(Name), Env);
+ // Src may have children modeled from other derived types than DstType, e.g.
+ // after other casts of Src to those types (likely in different branches,
+ // but without flow-condition-dependent field modeling). Only copy the
+ // children and synthetic fields of Src that are present in DstType.
+ const FieldSet FieldsInDstType =
+ Env.getDataflowAnalysisContext().getModeledFields(DstType);
+ for (auto [Field, SrcFieldLoc] : Src.children()) {
+ if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+ FieldAsFieldDecl && FieldsInDstType.contains(FieldAsFieldDecl))
+ copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
+ }
+ const llvm::StringMap<QualType> SyntheticFieldsForDstType =
+ Env.getDataflowAnalysisContext().getSyntheticFields(DstType);
+ for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) {
+ if (SyntheticFieldsForDstType.contains(Name))
+ copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
+ Dst.getSyntheticField(Name), Env);
+ }
} else {
for (const FieldDecl *Field :
Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) {
diff --git a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp
index 57162cd2efcc4..b29bd2e9560af 100644
--- a/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/RecordOpsTest.cpp
@@ -213,9 +213,9 @@ TEST(RecordOpsTest, CopyRecordBetweenDerivedAndBase) {
)";
auto SyntheticFieldCallback = [](QualType Ty) -> llvm::StringMap<QualType> {
CXXRecordDecl *ADecl = nullptr;
- if (Ty.getAsString() == "A")
+ if (Ty.getAsString() == "A" || Ty.getAsString() == "struct A")
ADecl = Ty->getAsCXXRecordDecl();
- else if (Ty.getAsString() == "B")
+ else if (Ty.getAsString() == "B" || Ty.getAsString() == "struct B")
ADecl = Ty->getAsCXXRecordDecl()
->bases_begin()
->getType()
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index cbd55966a3d88..de9008e138155 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -3709,6 +3709,80 @@ TEST(TransferTest, StaticCastBaseToDerived) {
});
}
+TEST(TransferTest, MultipleConstructionsFromStaticCastsBaseToDerived) {
+ std::string Code = R"cc(
+ struct Base {};
+
+struct DerivedOne : public Base {
+ // Need a field in one of the derived siblings that the other doesn't have.
+ int I;
+};
+
+struct DerivedTwo : public Base {};
+
+int getInt();
+
+void target(Base* B) {
+ // Need something to cause modeling of I.
+ DerivedOne D1;
+ (void)D1.I;
+
+ // Switch cases are a reasonable pattern where the same variable might be
+ // safely cast to two different derived types within the same function
+ // without resetting the value of the variable. getInt is a stand-in for what
+ // is usually a function indicating the dynamic derived type.
+ switch (getInt()) {
+ case 1:
+ // Need a CXXConstructExpr or copy/move CXXOperatorCallExpr from each of
+ // the casts to derived types, cast from the same base variable, to
+ // trigger the copyRecord behavior.
+ (void)new DerivedOne(*static_cast<DerivedOne*>(B));
+ break;
+ case 2:
+ (void)new DerivedTwo(*static_cast<DerivedTwo*>(B));
+ break;
+ };
+}
+)cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ // This is a crash repro. We used to crash when transferring the
+ // construction of DerivedTwo because B's StorageLocation had a child
+ // for the field I, but DerivedTwo doesn't. Now, we should only copy the
+ // fields from B that are present in DerivedTwo.
+ });
+}
+
+TEST(TransferTest, CopyConstructionOfBaseAfterStaticCastsBaseToDerived) {
+ std::string Code = R"cc(
+ struct Base {};
+
+struct Derived : public Base {
+// Need a field in Derived that is not in Base.
+ char C;
+};
+
+void target(Base* B, Base* OtherB) {
+ Derived* D = static_cast<Derived*>(B);
+ *B = *OtherB;
+ // Need something to cause modeling of C.
+ (void)D->C;
+}
+
+)cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ // This is a crash repro. We used to crash when transferring the
+ // copy construction of B from OtherB because B's StorageLocation had a
+ // child for the field C, but Base doesn't. Now, we should only copy the
+ // fields from B that are present in Base.
+ });
+}
+
TEST(TransferTest, ExplicitDerivedToBaseCast) {
std::string Code = R"cc(
struct Base {};
|
Code, | ||
[](QualType Ty) -> llvm::StringMap<QualType> { | ||
if (Ty.getAsString() != "S") | ||
std::string TypeAsString = Ty.getAsString(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
out of curiosity, what alerted you to these changes?
does it help to canonicalize Ty first ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new calls in copyRecord
to getSyntheticFields
were passing in the type such that it was stringified with the "struct", which caused these existing callbacks to return empty maps.
I don't think the API exposing the extension point for synthetic fields sets any sort of limits or expectations for whether the type passed to the callback is canonicalized, so I was hesitant to change the implementation to meet the expectations of a few brittle tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see, ok!
// This is a crash repro. We used to crash when transferring the | ||
// copy construction of B from OtherB because B's StorageLocation had a | ||
// child for the field C, but Base doesn't. Now, we should only copy the | ||
// fields from B that are present in Base. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"from B" -> "to B" ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed to from OtherB, since the crash was when accessing fields in OtherB's StorageLocation.
ASTContext &ASTCtx) { | ||
// This is a crash repro. We used to crash when transferring the | ||
// copy construction of B from OtherB because B's StorageLocation had a | ||
// child for the field C, but Base doesn't. Now, we should only copy the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but Base (and OtherB) doesn't ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
correct, expanded on that
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
Code, | ||
[](QualType Ty) -> llvm::StringMap<QualType> { | ||
if (Ty.getAsString() != "S") | ||
std::string TypeAsString = Ty.getAsString(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see, ok!
… type.
Our modeling of cast expressions now results in the potential presence of fields from other types derived from the SourceLocation's type.
Also add some additional debug logging and new tests, and update an existing test to reflect real usage of synthetic field callbacks.