diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index 2fa8ecf4d81e2..162aa8cad0fe2 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -279,7 +279,7 @@ class OnDiskGraphDB { /// /// Returns \p nullopt if the object is not stored in this CAS. LLVM_ABI_FOR_TEST std::optional - getExistingReference(ArrayRef Digest); + getExistingReference(ArrayRef Digest, bool CheckUpstream = true); /// Check whether the object associated with \p Ref is stored in the CAS. /// Note that this function will fault-in according to the policy. @@ -287,8 +287,21 @@ class OnDiskGraphDB { /// Check whether the object associated with \p Ref is stored in the CAS. /// Note that this function does not fault-in. - bool containsObject(ObjectID Ref) const { - return containsObject(Ref, /*CheckUpstream=*/true); + bool containsObject(ObjectID Ref, bool CheckUpstream = true) const { + auto Presence = getObjectPresence(Ref, CheckUpstream); + if (!Presence) { + consumeError(Presence.takeError()); + return false; + } + switch (*Presence) { + case ObjectPresence::Missing: + return false; + case ObjectPresence::InPrimaryDB: + return true; + case ObjectPresence::OnlyInUpstreamDB: + return true; + } + llvm_unreachable("Unknown ObjectPresence enum"); } /// \returns the data part of the provided object handle. @@ -370,24 +383,6 @@ class OnDiskGraphDB { LLVM_ABI_FOR_TEST Expected getObjectPresence(ObjectID Ref, bool CheckUpstream) const; - /// \returns true if object can be found in database. - bool containsObject(ObjectID Ref, bool CheckUpstream) const { - auto Presence = getObjectPresence(Ref, CheckUpstream); - if (!Presence) { - consumeError(Presence.takeError()); - return false; - } - switch (*Presence) { - case ObjectPresence::Missing: - return false; - case ObjectPresence::InPrimaryDB: - return true; - case ObjectPresence::OnlyInUpstreamDB: - return true; - } - llvm_unreachable("Unknown ObjectPresence enum"); - } - /// When \p load is called for a node that doesn't exist, this function tries /// to load it from the upstream store and copy it to the primary one. Expected> faultInFromUpstream(ObjectID PrimaryID); diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 003c850275ff4..9ed0938009689 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -235,17 +235,31 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, } Error UnifiedOnDiskActionCache::validate() const { - auto ValidateRef = [](FileOffset Offset, ArrayRef Value) -> Error { + auto ValidateRef = [this](FileOffset Offset, ArrayRef Value) -> Error { auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value); auto formatError = [&](Twine Msg) { return createStringError( llvm::errc::illegal_byte_sequence, "bad record at 0x" + - utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + - Msg.str()); + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + + " ref=0x" + utohexstr(ID.getOpaqueData(), /*LowerCase=*/true) + + ": " + Msg.str()); }; if (ID.getOpaqueData() == 0) return formatError("zero is not a valid ref"); + // Check containsObject first, because other API assumes a valid ObjectID. + if (!UniDB->getGraphDB().containsObject(ID, /*CheckUpstream=*/false)) + return formatError("ref is not in cas index"); + auto Hash = UniDB->getGraphDB().getDigest(ID); + auto Ref = + UniDB->getGraphDB().getExistingReference(Hash, /*CheckUpstream=*/false); + assert(Ref && "missing object passed containsObject check?"); + if (!Ref) + return formatError("ref is not in cas index after contains"); + if (*Ref != ID) + return formatError("ref does not match indexed offset " + + utohexstr(Ref->getOpaqueData(), /*LowerCase=*/true) + + " for hash " + toHex(Hash)); return Error::success(); }; return UniDB->getKeyValueDB().validate(ValidateRef); diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 84f27c4938050..01aabf279b970 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -1109,10 +1109,11 @@ ObjectID OnDiskGraphDB::getExternalReference(const IndexProxy &I) { } std::optional -OnDiskGraphDB::getExistingReference(ArrayRef Digest) { +OnDiskGraphDB::getExistingReference(ArrayRef Digest, + bool CheckUpstream) { auto tryUpstream = [&](std::optional I) -> std::optional { - if (!UpstreamDB) + if (!CheckUpstream || !UpstreamDB) return std::nullopt; std::optional UpstreamID = UpstreamDB->getExistingReference(Digest); diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test index 13f24f0873463..db7047d7e3904 100644 --- a/llvm/test/tools/llvm-cas/validation.test +++ b/llvm/test/tools/llvm-cas/validation.test @@ -25,6 +25,14 @@ RUN: --data - >%t/abc.casid RUN: llvm-cas --cas %t/ac --put-cache-key @%t/abc.casid @%t/empty.casid RUN: llvm-cas --cas %t/ac --validate + +# Check that validation fails if the objects referenced are missing. +RUN: mv %t/ac/v1.1/index.v1 %t/tmp.index.v1 +RUN: not llvm-cas --cas %t/ac --validate + +RUN: mv %t/tmp.index.v1 %t/ac/v1.1/index.v1 +RUN: llvm-cas --cas %t/ac --validate + # Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last # allocated record, leaving it invalid. RUN: truncate -s -40 %t/ac/v1.1/actions.v1