diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h index 669e84aba33d7..c65cfebe67862 100644 --- a/llvm/include/llvm/CAS/OnDiskDataAllocator.h +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -33,7 +33,7 @@ class OnDiskDataAllocator { /// An iterator-like return value for data insertion. Maybe it should be /// called \c iterator, but it has no increment. - class pointer { + class OnDiskPtr { public: FileOffset getOffset() const { return Offset; } explicit operator bool() const { return bool(getOffset()); } @@ -46,21 +46,21 @@ class OnDiskDataAllocator { return &Value; } - pointer() = default; + OnDiskPtr() = default; private: friend class OnDiskDataAllocator; - pointer(FileOffset Offset, ValueProxy Value) + OnDiskPtr(FileOffset Offset, ValueProxy Value) : Offset(Offset), Value(Value) {} FileOffset Offset; ValueProxy Value; }; /// Look up the data stored at the given offset. - const char *beginData(FileOffset Offset) const; + Expected> get(FileOffset Offset, size_t Size) const; /// Allocate at least \p Size with 8-byte alignment. - Expected allocate(size_t Size); + Expected allocate(size_t Size); /// \returns the buffer that was allocated at \p create time, with size /// \p UserHeaderSize. diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index ac19dc956baca..08e28d17668cf 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -410,7 +410,7 @@ class OnDiskGraphDB { static InternalRef makeInternalRef(FileOffset IndexOffset); IndexProxy - getIndexProxyFromPointer(OnDiskTrieRawHashMap::const_pointer P) const; + getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const; InternalRefArrayRef getInternalRefs(ObjectHandle Node) const; diff --git a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h index fbbbe22e8e538..c7f552db8a176 100644 --- a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h +++ b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h @@ -136,38 +136,38 @@ class OnDiskTrieRawHashMap { bool IsValue = false; }; - class pointer; - class const_pointer : public PointerImpl { + class OnDiskPtr; + class ConstOnDiskPtr : public PointerImpl { public: - const_pointer() = default; + ConstOnDiskPtr() = default; private: - friend class pointer; + friend class OnDiskPtr; friend class OnDiskTrieRawHashMap; - using const_pointer::PointerImpl::PointerImpl; + using ConstOnDiskPtr::PointerImpl::PointerImpl; }; - class pointer : public PointerImpl { + class OnDiskPtr : public PointerImpl { public: - operator const_pointer() const { - return const_pointer(Value, getOffset(), IsValue); + operator ConstOnDiskPtr() const { + return ConstOnDiskPtr(Value, getOffset(), IsValue); } - pointer() = default; + OnDiskPtr() = default; private: friend class OnDiskTrieRawHashMap; - using pointer::PointerImpl::PointerImpl; + using OnDiskPtr::PointerImpl::PointerImpl; }; /// Find the value from hash. /// /// \returns pointer to the value if exists, otherwise returns a non-value /// pointer that evaluates to `false` when convert to boolean. - const_pointer find(ArrayRef Hash) const; + ConstOnDiskPtr find(ArrayRef Hash) const; /// Helper function to recover a pointer into the trie from file offset. - Expected recoverFromFileOffset(FileOffset Offset) const; + Expected recoverFromFileOffset(FileOffset Offset) const; using LazyInsertOnConstructCB = function_ref; @@ -189,11 +189,11 @@ class OnDiskTrieRawHashMap { /// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize /// simultaneous writes, but that seems dangerous to use in a memory-mapped /// file in case a process crashes in the busy state. - Expected insertLazy(ArrayRef Hash, - LazyInsertOnConstructCB OnConstruct = nullptr, - LazyInsertOnLeakCB OnLeak = nullptr); + Expected insertLazy(ArrayRef Hash, + LazyInsertOnConstructCB OnConstruct = nullptr, + LazyInsertOnLeakCB OnLeak = nullptr); - Expected insert(const ConstValueProxy &Value) { + Expected insert(const ConstValueProxy &Value) { return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) { assert(Allocated.Hash == Value.Hash); assert(Allocated.Data.size() == Value.Data.size()); diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp index 2ab1290a84628..22e1043afbd98 100644 --- a/llvm/lib/CAS/OnDiskDataAllocator.cpp +++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp @@ -19,11 +19,6 @@ using namespace llvm; using namespace llvm::cas; using namespace llvm::cas::ondisk; -OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; -OnDiskDataAllocator & -OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; -OnDiskDataAllocator::~OnDiskDataAllocator() = default; - #if LLVM_ENABLE_ONDISK_CAS //===----------------------------------------------------------------------===// @@ -109,12 +104,16 @@ DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name, // Construct the header and the name. assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); auto *H = new (Alloc.getRegion().data() + *Offset) - Header{{TableHandle::TableKind::DataAllocator, (uint16_t)Name.size(), - (int32_t)(sizeof(Header) + UserHeaderSize)}, + Header{{TableHandle::TableKind::DataAllocator, + static_cast(Name.size()), + static_cast(sizeof(Header) + UserHeaderSize)}, /*AllocatorOffset=*/{0}, /*UserHeaderSize=*/UserHeaderSize}; - memset(H + 1, 0, UserHeaderSize); - char *NameStorage = reinterpret_cast(H + 1) + UserHeaderSize; + // Memset UserHeader. + char *UserHeader = reinterpret_cast(H + 1); + memset(UserHeader, 0, UserHeaderSize); + // Write database file name (null-terminated). + char *NameStorage = UserHeader + UserHeaderSize; llvm::copy(Name, NameStorage); NameStorage[Name.size()] = 0; return DataAllocatorHandle(Alloc.getRegion(), *H); @@ -168,21 +167,24 @@ Expected OnDiskDataAllocator::create( return OnDiskDataAllocator(std::make_unique(std::move(Impl))); } -Expected +Expected OnDiskDataAllocator::allocate(size_t Size) { auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size); if (LLVM_UNLIKELY(!Data)) return Data.takeError(); - return pointer(FileOffset(Data->data() - Impl->Store.getRegion().data()), - *Data); + return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()), + *Data); } -const char *OnDiskDataAllocator::beginData(FileOffset Offset) const { +Expected> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { assert(Offset); assert(Impl); - assert(Offset.get() < Impl->File.getAlloc().size()); - return Impl->File.getRegion().data() + Offset.get(); + if (Offset.get() + Size >= Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "requested size too large in allocator"); + return ArrayRef{Impl->File.getRegion().data() + Offset.get(), Size}; } MutableArrayRef OnDiskDataAllocator::getUserHeader() { @@ -204,7 +206,6 @@ struct OnDiskDataAllocator::ImplType {}; Expected OnDiskDataAllocator::create( const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, std::optional NewFileInitialSize, uint32_t UserHeaderSize, - std::shared_ptr Logger, function_ref UserHeaderInit) { return createStringError(make_error_code(std::errc::not_supported), "OnDiskDataAllocator is not supported"); @@ -226,3 +227,8 @@ size_t OnDiskDataAllocator::size() const { return 0; } size_t OnDiskDataAllocator::capacity() const { return 0; } #endif // LLVM_ENABLE_ONDISK_CAS + +OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator & +OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator::~OnDiskDataAllocator() = default; diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 6d1152c93f00e..2592cbfc65641 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -336,6 +336,8 @@ struct DataRecordHandle { return DataRecordHandle( *reinterpret_cast(Mem)); } + static Expected + getFromDataPool(const OnDiskDataAllocator &Pool, FileOffset Offset); explicit operator bool() const { return H; } const Header &getHeader() const { return *H; } @@ -645,6 +647,22 @@ DataRecordHandle DataRecordHandle::construct(char *Mem, const Input &I) { return constructImpl(Mem, I, Layout(I)); } +Expected +DataRecordHandle::getFromDataPool(const OnDiskDataAllocator &Pool, + FileOffset Offset) { + auto HeaderData = Pool.get(Offset, sizeof(DataRecordHandle::Header)); + if (!HeaderData) + return HeaderData.takeError(); + + auto Record = DataRecordHandle::get(HeaderData->data()); + if (Record.getTotalSize() + Offset.get() > Pool.size()) + return createStringError( + make_error_code(std::errc::illegal_byte_sequence), + "data record span passed the end of the data pool"); + + return Record; +} + DataRecordHandle DataRecordHandle::constructImpl(char *Mem, const Input &I, const Layout &L) { char *Next = Mem + sizeof(Header); @@ -973,16 +991,17 @@ Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { case TrieRecord::StorageKind::Unknown: llvm_unreachable("already handled"); case TrieRecord::StorageKind::DataPool: { - auto DataRecord = DataRecordHandle::get(DataPool.beginData(D.Offset)); - if (DataRecord.getTotalSize() + D.Offset.get() > DataPool.size()) - return dataError("data record span passed the end of the data pool"); - for (auto InternRef : DataRecord.getRefs()) { + auto DataRecord = DataRecordHandle::getFromDataPool(DataPool, D.Offset); + if (!DataRecord) + return dataError(toString(DataRecord.takeError())); + + for (auto InternRef : DataRecord->getRefs()) { auto Index = getIndexProxyFromRef(InternRef); if (!Index) return Index.takeError(); Refs.push_back(Index->Hash); } - StoredData = DataRecord.getData(); + StoredData = DataRecord->getData(); break; } case TrieRecord::StorageKind::Standalone: { @@ -1068,11 +1087,15 @@ void OnDiskGraphDB::print(raw_ostream &OS) const { Pool, [](PoolInfo LHS, PoolInfo RHS) { return LHS.Offset < RHS.Offset; }); for (PoolInfo PI : Pool) { OS << "- addr=" << (void *)PI.Offset << " "; - DataRecordHandle D = - DataRecordHandle::get(DataPool.beginData(FileOffset(PI.Offset))); - OS << "record refs=" << D.getNumRefs() << " data=" << D.getDataSize() - << " size=" << D.getTotalSize() - << " end=" << (void *)(PI.Offset + D.getTotalSize()) << "\n"; + auto D = DataRecordHandle::getFromDataPool(DataPool, FileOffset(PI.Offset)); + if (!D) { + OS << "error: " << toString(D.takeError()); + return; + } + + OS << "record refs=" << D->getNumRefs() << " data=" << D->getDataSize() + << " size=" << D->getTotalSize() + << " end=" << (void *)(PI.Offset + D->getTotalSize()) << "\n"; } } @@ -1094,7 +1117,7 @@ OnDiskGraphDB::indexHash(ArrayRef Hash) { } OnDiskGraphDB::IndexProxy OnDiskGraphDB::getIndexProxyFromPointer( - OnDiskTrieRawHashMap::const_pointer P) const { + OnDiskTrieRawHashMap::ConstOnDiskPtr P) const { assert(P); assert(P.getOffset()); return IndexProxy{P.getOffset(), P->Hash, @@ -1131,7 +1154,7 @@ OnDiskGraphDB::getExistingReference(ArrayRef Digest) { return getExternalReference(*I); }; - OnDiskTrieRawHashMap::const_pointer P = Index.find(Digest); + OnDiskTrieRawHashMap::ConstOnDiskPtr P = Index.find(Digest); if (!P) return tryUpstream(std::nullopt); IndexProxy I = getIndexProxyFromPointer(P); @@ -1289,8 +1312,8 @@ OnDiskContent OnDiskGraphDB::getContentFromHandle(ObjectHandle OH) const { if (Handle.SDIM) return Handle.SDIM->getContent(); - auto DataHandle = - DataRecordHandle::get(DataPool.beginData(Handle.getAsFileOffset())); + auto DataHandle = cantFail( + DataRecordHandle::getFromDataPool(DataPool, Handle.getAsFileOffset())); assert(DataHandle.getData().end()[0] == 0 && "Null termination"); return OnDiskContent{DataHandle, std::nullopt}; } diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index 041c1a17adc0e..16dda5a5bd9fb 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -42,7 +42,7 @@ Expected> OnDiskKeyValueDB::put(ArrayRef Key, Expected>> OnDiskKeyValueDB::get(ArrayRef Key) { // Check the result cache. - OnDiskTrieRawHashMap::const_pointer ActionP = Cache.find(Key); + OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); if (!ActionP) return std::nullopt; assert(isAddrAligned(Align(8), ActionP->Data.data())); diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp index 751e27f0cc132..3f9782392c13c 100644 --- a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -465,7 +465,7 @@ TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc, return Record; } -Expected +Expected OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { // Check alignment. if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get())) @@ -486,17 +486,17 @@ OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { // Looks okay... TrieRawHashMapHandle::RecordData D = Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); - return const_pointer(D.Proxy, D.getFileOffset()); + return ConstOnDiskPtr(D.Proxy, D.getFileOffset()); } -OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::ConstOnDiskPtr OnDiskTrieRawHashMap::find(ArrayRef Hash) const { TrieRawHashMapHandle Trie = Impl->Trie; assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); SubtrieHandle S = Trie.getRoot(); if (!S) - return const_pointer(); + return ConstOnDiskPtr(); TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash); size_t Index = IndexGen.next(); @@ -504,13 +504,13 @@ OnDiskTrieRawHashMap::find(ArrayRef Hash) const { // Try to set the content. SubtrieSlotValue V = S.load(Index); if (!V) - return const_pointer(); + return ConstOnDiskPtr(); // Check for an exact match. if (V.isData()) { TrieRawHashMapHandle::RecordData D = Trie.getRecord(V); - return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset()) - : const_pointer(); + return D.Proxy.Hash == Hash ? ConstOnDiskPtr(D.Proxy, D.getFileOffset()) + : ConstOnDiskPtr(); } Index = IndexGen.next(); @@ -528,7 +528,7 @@ void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) { H->NumBits = NumBits; } -Expected +Expected OnDiskTrieRawHashMap::insertLazy(ArrayRef Hash, LazyInsertOnConstructCB OnConstruct, LazyInsertOnLeakCB OnLeak) { @@ -561,7 +561,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef Hash, } if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset)) - return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset()); + return OnDiskPtr(NewRecord->Proxy, + NewRecord->Offset.asDataFileOffset()); // Race means that Existing is no longer empty; fall through... } @@ -578,8 +579,8 @@ OnDiskTrieRawHashMap::insertLazy(ArrayRef Hash, if (NewRecord && OnLeak) OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy, ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy); - return pointer(ExistingRecord.Proxy, - ExistingRecord.Offset.asDataFileOffset()); + return OnDiskPtr(ExistingRecord.Proxy, + ExistingRecord.Offset.asDataFileOffset()); } // Sink the existing content as long as the indexes match. diff --git a/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp index 38ef36afffbc5..299a608e842b2 100644 --- a/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp +++ b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp @@ -32,7 +32,7 @@ TEST(OnDiskDataAllocatorTest, Allocate) { // Allocate. { for (size_t Size = 1; Size < 16; ++Size) { - OnDiskDataAllocator::pointer P; + OnDiskDataAllocator::OnDiskPtr P; ASSERT_THAT_ERROR(Allocator->allocate(Size).moveInto(P), Succeeded()); ASSERT_TRUE( isAligned(MappedFileRegionArena::getAlign(), P.getOffset().get())); @@ -41,7 +41,7 @@ TEST(OnDiskDataAllocatorTest, Allocate) { // Out of space. { - OnDiskDataAllocator::pointer P; + OnDiskDataAllocator::OnDiskPtr P; ASSERT_THAT_ERROR(Allocator->allocate(MB).moveInto(P), Failed()); } diff --git a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp index 2c4a9117ec5fb..8465a71019f90 100644 --- a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp +++ b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp @@ -71,7 +71,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { std::optional Offset; std::optional> Data; { - std::optional Insertion; + std::optional Insertion; ASSERT_THAT_ERROR(Trie1->insert({Hash0, Data0v1}).moveInto(Insertion), Succeeded()); EXPECT_EQ(Hash0, (*Insertion)->Hash); @@ -128,7 +128,7 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { // Recover from an offset. { - OnDiskTrieRawHashMap::const_pointer Recovered; + OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered; ASSERT_THAT_ERROR(Trie1->recoverFromFileOffset(*Offset).moveInto(Recovered), Succeeded()); ASSERT_TRUE(Recovered); @@ -140,14 +140,14 @@ TEST_P(OnDiskTrieRawHashMapTestFixture, General) { // Recover from a bad offset. { FileOffset BadOffset(1); - OnDiskTrieRawHashMap::const_pointer Recovered; + OnDiskTrieRawHashMap::ConstOnDiskPtr Recovered; ASSERT_THAT_ERROR( Trie1->recoverFromFileOffset(BadOffset).moveInto(Recovered), Failed()); } // Insert another thing. { - std::optional Insertion; + std::optional Insertion; ASSERT_THAT_ERROR(Trie1->insert({Hash1, Data1}).moveInto(Insertion), Succeeded()); EXPECT_EQ(Hash1, (*Insertion)->Hash); @@ -210,7 +210,7 @@ TEST(OnDiskTrieRawHashMapTest, OutOfSpace) { auto Hash0 = ArrayRef(Hash0Bytes); constexpr StringLiteral Data0v1Bytes = "data0.v1"; ArrayRef Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size()); - std::optional Insertion; + std::optional Insertion; ASSERT_THAT_ERROR(Trie->insert({Hash0, Data0v1}).moveInto(Insertion), Failed()); }