From fe479f35c2445d432fe843e2347fe3048bcf80a8 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Mon, 6 Oct 2025 09:30:47 -0700 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 --- llvm/include/llvm/CAS/OnDiskDataAllocator.h | 95 +++++++ llvm/lib/CAS/CMakeLists.txt | 1 + llvm/lib/CAS/OnDiskDataAllocator.cpp | 234 ++++++++++++++++++ llvm/unittests/CAS/CMakeLists.txt | 1 + .../unittests/CAS/OnDiskDataAllocatorTest.cpp | 66 +++++ 5 files changed, 397 insertions(+) create mode 100644 llvm/include/llvm/CAS/OnDiskDataAllocator.h create mode 100644 llvm/lib/CAS/OnDiskDataAllocator.cpp create mode 100644 llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp diff --git a/llvm/include/llvm/CAS/OnDiskDataAllocator.h b/llvm/include/llvm/CAS/OnDiskDataAllocator.h new file mode 100644 index 0000000000000..2809df800621b --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskDataAllocator.h @@ -0,0 +1,95 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for OnDiskDataAllocator, a file backed data +/// pool can be used to allocate space to store data packed in a single file. It +/// is based on MappedFileRegionArena and includes a header in the beginning to +/// provide metadata. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKDATAALLOCATOR_H +#define LLVM_CAS_ONDISKDATAALLOCATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CAS/FileOffset.h" +#include "llvm/Support/Error.h" + +namespace llvm::cas { + +/// Sink for data. Stores variable length data with 8-byte alignment. Does not +/// track size of data, which is assumed to known from context, or embedded. +/// Uses 0-padding but does not guarantee 0-termination. +class OnDiskDataAllocator { +public: + using ValueProxy = MutableArrayRef; + + /// A pointer to data stored on disk. + class OnDiskPtr { + public: + FileOffset getOffset() const { return Offset; } + explicit operator bool() const { return bool(getOffset()); } + const ValueProxy &operator*() const { + assert(Offset && "Null dereference"); + return Value; + } + const ValueProxy *operator->() const { + assert(Offset && "Null dereference"); + return &Value; + } + + OnDiskPtr() = default; + + private: + friend class OnDiskDataAllocator; + OnDiskPtr(FileOffset Offset, ValueProxy Value) + : Offset(Offset), Value(Value) {} + FileOffset Offset; + ValueProxy Value; + }; + + /// Get the data of \p Size stored at the given \p Offset. Note the allocator + /// doesn't keep track of the allocation size, thus \p Size doesn't need to + /// match the size of allocation but needs to be smaller. + Expected> get(FileOffset Offset, size_t Size) const; + + /// Allocate at least \p Size with 8-byte alignment. + Expected allocate(size_t Size); + + /// \returns the buffer that was allocated at \p create time, with size + /// \p UserHeaderSize. + MutableArrayRef getUserHeader(); + + size_t size() const; + size_t capacity() const; + + static Expected + create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional NewFileInitialSize, + uint32_t UserHeaderSize = 0, + function_ref UserHeaderInit = nullptr); + + OnDiskDataAllocator(OnDiskDataAllocator &&RHS); + OnDiskDataAllocator &operator=(OnDiskDataAllocator &&RHS); + + // No copy. Just call \a create() again. + OnDiskDataAllocator(const OnDiskDataAllocator &) = delete; + OnDiskDataAllocator &operator=(const OnDiskDataAllocator &) = delete; + + ~OnDiskDataAllocator(); + +private: + struct ImplType; + explicit OnDiskDataAllocator(std::unique_ptr Impl); + std::unique_ptr Impl; +}; + +} // namespace llvm::cas + +#endif // LLVM_CAS_ONDISKDATAALLOCATOR_H diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index 7ae5f7e46418e..bca39b645af45 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMCAS MappedFileRegionArena.cpp ObjectStore.cpp OnDiskCommon.cpp + OnDiskDataAllocator.cpp OnDiskTrieRawHashMap.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/CAS/OnDiskDataAllocator.cpp b/llvm/lib/CAS/OnDiskDataAllocator.cpp new file mode 100644 index 0000000000000..13bbd66139178 --- /dev/null +++ b/llvm/lib/CAS/OnDiskDataAllocator.cpp @@ -0,0 +1,234 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file Implements OnDiskDataAllocator. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "DatabaseFile.h" +#include "llvm/Config/llvm-config.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +#if LLVM_ENABLE_ONDISK_CAS + +//===----------------------------------------------------------------------===// +// DataAllocator data structures. +//===----------------------------------------------------------------------===// + +namespace { +/// DataAllocator table layout: +/// - [8-bytes: Generic table header] +/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) +/// - 8-bytes: Size for user data header +/// - +/// +/// Record layout: +/// - +class DataAllocatorHandle { +public: + static constexpr TableHandle::TableKind Kind = + TableHandle::TableKind::DataAllocator; + + struct Header { + TableHandle::Header GenericHeader; + std::atomic AllocatorOffset; + const uint64_t UserHeaderSize; + }; + + operator TableHandle() const { + if (!H) + return TableHandle(); + return TableHandle(*Region, H->GenericHeader); + } + + Expected> allocate(MappedFileRegionArena &Alloc, + size_t DataSize) { + assert(&Alloc.getRegion() == Region); + auto Ptr = Alloc.allocate(DataSize); + if (LLVM_UNLIKELY(!Ptr)) + return Ptr.takeError(); + return MutableArrayRef(*Ptr, DataSize); + } + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + MappedFileRegion &getRegion() const { return *Region; } + + MutableArrayRef getUserHeader() { + return MutableArrayRef(reinterpret_cast(H + 1), + H->UserHeaderSize); + } + + static Expected + create(MappedFileRegionArena &Alloc, StringRef Name, uint32_t UserHeaderSize); + + DataAllocatorHandle() = default; + DataAllocatorHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H) {} + DataAllocatorHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : DataAllocatorHandle( + Region, *reinterpret_cast
(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +} // end anonymous namespace + +struct OnDiskDataAllocator::ImplType { + DatabaseFile File; + DataAllocatorHandle Store; +}; + +Expected +DataAllocatorHandle::create(MappedFileRegionArena &Alloc, StringRef Name, + uint32_t UserHeaderSize) { + // Allocate. + auto Offset = + Alloc.allocateOffset(sizeof(Header) + UserHeaderSize + Name.size() + 1); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + // Construct the header and the name. + assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); + auto *H = new (Alloc.getRegion().data() + *Offset) + Header{{TableHandle::TableKind::DataAllocator, + static_cast(Name.size()), + static_cast(sizeof(Header) + UserHeaderSize)}, + /*AllocatorOffset=*/{0}, + /*UserHeaderSize=*/UserHeaderSize}; + // Memset UserHeader. + char *UserHeader = reinterpret_cast(H + 1); + memset(UserHeader, 0, UserHeaderSize); + // Write database file name (null-terminated). + char *NameStorage = UserHeader + UserHeaderSize; + llvm::copy(Name, NameStorage); + NameStorage[Name.size()] = 0; + return DataAllocatorHandle(Alloc.getRegion(), *H); +} + +Expected OnDiskDataAllocator::create( + const Twine &PathTwine, const Twine &TableNameTwine, uint64_t MaxFileSize, + std::optional NewFileInitialSize, uint32_t UserHeaderSize, + function_ref UserHeaderInit) { + assert(!UserHeaderSize || UserHeaderInit); + SmallString<128> PathStorage; + StringRef Path = PathTwine.toStringRef(PathStorage); + SmallString<128> TableNameStorage; + StringRef TableName = TableNameTwine.toStringRef(TableNameStorage); + + // Constructor for if the file doesn't exist. + auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { + auto Store = + DataAllocatorHandle::create(DB.getAlloc(), TableName, UserHeaderSize); + if (LLVM_UNLIKELY(!Store)) + return Store.takeError(); + + if (auto E = DB.addTable(*Store)) + return E; + + if (UserHeaderSize) + UserHeaderInit(Store->getUserHeader().data()); + return Error::success(); + }; + + // Get or create the file. + Expected File = + DatabaseFile::create(Path, MaxFileSize, NewDBConstructor); + if (!File) + return File.takeError(); + + // Find the table and validate it. + std::optional Table = File->findTable(TableName); + if (!Table) + return createTableConfigError(std::errc::argument_out_of_domain, Path, + TableName, "table not found"); + if (Error E = checkTable("table kind", (size_t)DataAllocatorHandle::Kind, + (size_t)Table->getHeader().Kind, Path, TableName)) + return std::move(E); + auto Store = Table->cast(); + assert(Store && "Already checked the kind"); + + // Success. + OnDiskDataAllocator::ImplType Impl{DatabaseFile(std::move(*File)), Store}; + return OnDiskDataAllocator(std::make_unique(std::move(Impl))); +} + +Expected +OnDiskDataAllocator::allocate(size_t Size) { + auto Data = Impl->Store.allocate(Impl->File.getAlloc(), Size); + if (LLVM_UNLIKELY(!Data)) + return Data.takeError(); + + return OnDiskPtr(FileOffset(Data->data() - Impl->Store.getRegion().data()), + *Data); +} + +Expected> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + assert(Offset); + assert(Impl); + if (Offset.get() + Size >= Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "requested size too large in allocator"); + return ArrayRef{Impl->File.getRegion().data() + Offset.get(), Size}; +} + +MutableArrayRef OnDiskDataAllocator::getUserHeader() { + return Impl->Store.getUserHeader(); +} + +size_t OnDiskDataAllocator::size() const { return Impl->File.size(); } +size_t OnDiskDataAllocator::capacity() const { + return Impl->File.getRegion().size(); +} + +OnDiskDataAllocator::OnDiskDataAllocator(std::unique_ptr Impl) + : Impl(std::move(Impl)) {} + +#else // !LLVM_ENABLE_ONDISK_CAS + +struct OnDiskDataAllocator::ImplType {}; + +Expected OnDiskDataAllocator::create( + const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, + std::optional NewFileInitialSize, uint32_t UserHeaderSize, + function_ref UserHeaderInit) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected +OnDiskDataAllocator::allocate(size_t Size) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +Expected> OnDiskDataAllocator::get(FileOffset Offset, + size_t Size) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskDataAllocator is not supported"); +} + +MutableArrayRef OnDiskDataAllocator::getUserHeader() { return {}; } + +size_t OnDiskDataAllocator::size() const { return 0; } +size_t OnDiskDataAllocator::capacity() const { return 0; } + +#endif // LLVM_ENABLE_ONDISK_CAS + +OnDiskDataAllocator::OnDiskDataAllocator(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator & +OnDiskDataAllocator::operator=(OnDiskDataAllocator &&RHS) = default; +OnDiskDataAllocator::~OnDiskDataAllocator() = default; diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index 0f8fcb9e98954..ee40e6c9879a1 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_unittest(CASTests ActionCacheTest.cpp CASTestConfig.cpp ObjectStoreTest.cpp + OnDiskDataAllocatorTest.cpp OnDiskTrieRawHashMapTest.cpp ProgramTest.cpp ) diff --git a/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp new file mode 100644 index 0000000000000..966fa03076841 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskDataAllocatorTest.cpp @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskDataAllocator.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" + +#if LLVM_ENABLE_ONDISK_CAS + +using namespace llvm; +using namespace llvm::cas; + +TEST(OnDiskDataAllocatorTest, Allocate) { + unittest::TempDir Temp("data-allocator", /*Unique=*/true); + constexpr size_t MB = 1024u * 1024u; + + std::optional Allocator; + ASSERT_THAT_ERROR(OnDiskDataAllocator::create( + Temp.path("allocator"), "data", /*MaxFileSize=*/MB, + /*NewFileInitialSize=*/std::nullopt) + .moveInto(Allocator), + Succeeded()); + + // Allocate. + { + for (size_t Size = 1; Size < 16; ++Size) { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(Size).moveInto(P), Succeeded()); + EXPECT_TRUE( + isAligned(MappedFileRegionArena::getAlign(), P.getOffset().get())); + } + } + + // Out of space. + { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(MB).moveInto(P), Failed()); + } + + // Check size and capacity. + { + ASSERT_EQ(Allocator->capacity(), MB); + ASSERT_LE(Allocator->size(), MB); + } + + // Get. + { + OnDiskDataAllocator::OnDiskPtr P; + ASSERT_THAT_ERROR(Allocator->allocate(32).moveInto(P), Succeeded()); + ArrayRef Data; + ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 16).moveInto(Data), + Succeeded()); + ASSERT_THAT_ERROR(Allocator->get(P.getOffset(), 1025).moveInto(Data), + Failed()); + } +} + +#endif // LLVM_ENABLE_ONDISK_CAS