1,548 changes: 1,548 additions & 0 deletions llvm/lib/CAS/OnDiskGraphDB.cpp

Large diffs are not rendered by default.

1,352 changes: 1,352 additions & 0 deletions llvm/lib/CAS/OnDiskHashMappedTrie.cpp

Large diffs are not rendered by default.

79 changes: 79 additions & 0 deletions llvm/lib/CAS/OnDiskKeyValueDB.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
//===- OnDiskKeyValueDB.cpp -------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/CAS/OnDiskKeyValueDB.h"
#include "OnDiskCommon.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"

using namespace llvm;
using namespace llvm::cas;
using namespace llvm::cas::ondisk;

static constexpr StringLiteral ActionCacheFile = "actions";
static constexpr StringLiteral FilePrefix = "v1.";

Expected<ArrayRef<char>> OnDiskKeyValueDB::put(ArrayRef<uint8_t> Key,
ArrayRef<char> Value) {
if (LLVM_UNLIKELY(Value.size() != ValueSize))
return createStringError(errc::invalid_argument,
"expected value size of " + itostr(ValueSize) +
", got: " + itostr(Value.size()));
assert(Value.size() == ValueSize);
OnDiskHashMappedTrie::pointer ActionP = Cache.insertLazy(
Key, [&](FileOffset TentativeOffset,
OnDiskHashMappedTrie::ValueProxy TentativeValue) {
assert(TentativeValue.Data.size() == ValueSize);
llvm::copy(Value, TentativeValue.Data.data());
});
return ActionP->Data;
}

Expected<std::optional<ArrayRef<char>>>
OnDiskKeyValueDB::get(ArrayRef<uint8_t> Key) {
// Check the result cache.
OnDiskHashMappedTrie::const_pointer ActionP = Cache.find(Key);
if (!ActionP)
return std::nullopt;
assert(isAddrAligned(Align(8), ActionP->Data.data()));
return ActionP->Data;
}

Expected<std::unique_ptr<OnDiskKeyValueDB>>
OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize,
StringRef ValueName, size_t ValueSize) {
if (std::error_code EC = sys::fs::create_directories(Path))
return createFileError(Path, EC);

SmallString<256> CachePath(Path);
sys::path::append(CachePath, FilePrefix + ActionCacheFile);
constexpr uint64_t MB = 1024ull * 1024ull;
constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;

uint64_t MaxFileSize = GB;
auto CustomSize = getOverriddenMaxMappingSize();
if (!CustomSize)
return CustomSize.takeError();
if (*CustomSize)
MaxFileSize = **CustomSize;

std::optional<OnDiskHashMappedTrie> ActionCache;
if (Error E = OnDiskHashMappedTrie::create(
CachePath,
"llvm.actioncache[" + HashName + "->" + ValueName + "]",
KeySize * 8,
/*DataSize=*/ValueSize, MaxFileSize, /*MinFileSize=*/MB)
.moveInto(ActionCache))
return std::move(E);

return std::unique_ptr<OnDiskKeyValueDB>(
new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache)));
}
1 change: 1 addition & 0 deletions llvm/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_subdirectory(FileCheck)
add_subdirectory(InterfaceStub)
add_subdirectory(IRPrinter)
add_subdirectory(IRReader)
add_subdirectory(CAS)
add_subdirectory(CGData)
add_subdirectory(CodeGen)
add_subdirectory(CodeGenTypes)
Expand Down
11 changes: 7 additions & 4 deletions llvm/lib/Support/Unix/Path.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1223,13 +1223,14 @@ Expected<size_t> readNativeFileSlice(file_t FD, MutableArrayRef<char> Buf,
return NumRead;
}

std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout) {
std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout,
bool Exclusive) {
auto Start = std::chrono::steady_clock::now();
auto End = Start + Timeout;
do {
struct flock Lock;
memset(&Lock, 0, sizeof(Lock));
Lock.l_type = F_WRLCK;
Lock.l_type = Exclusive ? F_WRLCK : F_RDLCK;
Lock.l_whence = SEEK_SET;
Lock.l_start = 0;
Lock.l_len = 0;
Expand All @@ -1238,15 +1239,17 @@ std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout) {
int Error = errno;
if (Error != EACCES && Error != EAGAIN)
return std::error_code(Error, std::generic_category());
if (Timeout.count() == 0)
break;
usleep(1000);
} while (std::chrono::steady_clock::now() < End);
return make_error_code(errc::no_lock_available);
}

std::error_code lockFile(int FD) {
std::error_code lockFile(int FD, bool Exclusive) {
struct flock Lock;
memset(&Lock, 0, sizeof(Lock));
Lock.l_type = F_WRLCK;
Lock.l_type = Exclusive ? F_WRLCK : F_RDLCK;
Lock.l_whence = SEEK_SET;
Lock.l_start = 0;
Lock.l_len = 0;
Expand Down
12 changes: 8 additions & 4 deletions llvm/lib/Support/Windows/Path.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1327,8 +1327,10 @@ Expected<size_t> readNativeFileSlice(file_t FileHandle,
return readNativeFileImpl(FileHandle, Buf, &Overlapped);
}

std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout) {
DWORD Flags = LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY;
std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout,
bool Exclusive) {
DWORD Flags = Exclusive ? LOCKFILE_EXCLUSIVE_LOCK : 0;
Flags |= LOCKFILE_FAIL_IMMEDIATELY;
OVERLAPPED OV = {};
file_t File = convertFDToNativeFile(FD);
auto Start = std::chrono::steady_clock::now();
Expand All @@ -1338,6 +1340,8 @@ std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout) {
return std::error_code();
DWORD Error = ::GetLastError();
if (Error == ERROR_LOCK_VIOLATION) {
if (Timeout.count() == 0)
break;
::Sleep(1);
continue;
}
Expand All @@ -1346,8 +1350,8 @@ std::error_code tryLockFile(int FD, std::chrono::milliseconds Timeout) {
return mapWindowsError(ERROR_LOCK_VIOLATION);
}

std::error_code lockFile(int FD) {
DWORD Flags = LOCKFILE_EXCLUSIVE_LOCK;
std::error_code lockFile(int FD, bool Exclusive) {
DWORD Flags = Exclusive ? LOCKFILE_EXCLUSIVE_LOCK : 0;
OVERLAPPED OV = {};
file_t File = convertFDToNativeFile(FD);
if (::LockFileEx(File, Flags, 0, MAXDWORD, MAXDWORD, &OV))
Expand Down
73 changes: 73 additions & 0 deletions llvm/unittests/CAS/ActionCacheTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
//===- ActionCacheTest.cpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/CAS/ActionCache.h"
#include "CASTestConfig.h"
#include "llvm/CAS/ObjectStore.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"

using namespace llvm;
using namespace llvm::cas;

TEST_P(CASTest, ActionCacheHit) {
std::shared_ptr<ObjectStore> CAS = createObjectStore();
std::unique_ptr<ActionCache> Cache = createActionCache();

std::optional<ObjectProxy> ID;
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "1").moveInto(ID),
Succeeded());
std::optional<CASID> ResultID;
ASSERT_THAT_ERROR(Cache->put(*ID, *ID), Succeeded());
ASSERT_THAT_ERROR(Cache->get(*ID).moveInto(ResultID), Succeeded());
ASSERT_TRUE(ResultID);
std::optional<ObjectRef> Result = CAS->getReference(*ResultID);
ASSERT_TRUE(Result);
ASSERT_EQ(*ID, *Result);
}

TEST_P(CASTest, ActionCacheMiss) {
std::shared_ptr<ObjectStore> CAS = createObjectStore();
std::unique_ptr<ActionCache> Cache = createActionCache();

std::optional<ObjectProxy> ID1, ID2;
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "1").moveInto(ID1),
Succeeded());
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "2").moveInto(ID2),
Succeeded());
ASSERT_THAT_ERROR(Cache->put(*ID1, *ID2), Succeeded());
// This is a cache miss for looking up a key doesn't exist.
std::optional<CASID> Result1;
ASSERT_THAT_ERROR(Cache->get(*ID2).moveInto(Result1), Succeeded());
ASSERT_FALSE(Result1);

ASSERT_THAT_ERROR(Cache->put(*ID2, *ID1), Succeeded());
// Cache hit after adding the value.
std::optional<CASID> Result2;
ASSERT_THAT_ERROR(Cache->get(*ID2).moveInto(Result2), Succeeded());
ASSERT_TRUE(Result2);
std::optional<ObjectRef> Ref = CAS->getReference(*Result2);
ASSERT_TRUE(Ref);
ASSERT_EQ(*ID1, *Ref);
}

TEST_P(CASTest, ActionCacheRewrite) {
std::shared_ptr<ObjectStore> CAS = createObjectStore();
std::unique_ptr<ActionCache> Cache = createActionCache();

std::optional<ObjectProxy> ID1, ID2;
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "1").moveInto(ID1),
Succeeded());
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "2").moveInto(ID2),
Succeeded());
ASSERT_THAT_ERROR(Cache->put(*ID1, *ID1), Succeeded());
// Writing to the same key with different value is error.
ASSERT_THAT_ERROR(Cache->put(*ID1, *ID2), Failed());
// Writing the same value multiple times to the same key is fine.
ASSERT_THAT_ERROR(Cache->put(*ID1, *ID1), Succeeded());
}
23 changes: 23 additions & 0 deletions llvm/unittests/CAS/CASTestConfig.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===- CASTestConfig.cpp --------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "CASTestConfig.h"
#include "llvm/CAS/ObjectStore.h"
#include "gtest/gtest.h"

using namespace llvm;
using namespace llvm::cas;

CASTestingEnv createInMemory(int I) {
std::unique_ptr<ObjectStore> CAS = createInMemoryCAS();
std::unique_ptr<ActionCache> Cache = createInMemoryActionCache();
return CASTestingEnv{std::move(CAS), std::move(Cache)};
}

INSTANTIATE_TEST_SUITE_P(InMemoryCAS, CASTest,
::testing::Values(createInMemory));
38 changes: 38 additions & 0 deletions llvm/unittests/CAS/CASTestConfig.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===- CASTestConfig.h ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/CAS/ActionCache.h"
#include "llvm/CAS/ObjectStore.h"
#include "gtest/gtest.h"

#ifndef LLVM_UNITTESTS_CASTESTCONFIG_H
#define LLVM_UNITTESTS_CASTESTCONFIG_H

struct CASTestingEnv {
std::unique_ptr<llvm::cas::ObjectStore> CAS;
std::unique_ptr<llvm::cas::ActionCache> Cache;
};

class CASTest
: public testing::TestWithParam<std::function<CASTestingEnv(int)>> {
protected:
std::optional<int> NextCASIndex;

std::unique_ptr<llvm::cas::ObjectStore> createObjectStore() {
auto TD = GetParam()(++(*NextCASIndex));
return std::move(TD.CAS);
}
std::unique_ptr<llvm::cas::ActionCache> createActionCache() {
auto TD = GetParam()(++(*NextCASIndex));
return std::move(TD.Cache);
}
void SetUp() { NextCASIndex = 0; }
void TearDown() { NextCASIndex = std::nullopt; }
};

#endif
21 changes: 21 additions & 0 deletions llvm/unittests/CAS/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
if (LLVM_ENABLE_ONDISK_CAS)
add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1)
endif()

set(LLVM_LINK_COMPONENTS
Support
CAS
TestingSupport
)

add_llvm_unittest(CASTests
ActionCacheTest.cpp
CASTestConfig.cpp
ObjectStoreTest.cpp
OnDiskGraphDBTest.cpp
OnDiskHashMappedTrieTest.cpp
OnDiskKeyValueDBTest.cpp
ProgramTest.cpp
)

target_link_libraries(CASTests PRIVATE LLVMTestingSupport)
360 changes: 360 additions & 0 deletions llvm/unittests/CAS/ObjectStoreTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,360 @@
//===- ObjectStoreTest.cpp ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/CAS/ObjectStore.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"

#include "CASTestConfig.h"

using namespace llvm;
using namespace llvm::cas;

TEST_P(CASTest, PrintIDs) {
std::unique_ptr<ObjectStore> CAS = createObjectStore();

std::optional<CASID> ID1, ID2;
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "1").moveInto(ID1),
Succeeded());
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, "2").moveInto(ID2),
Succeeded());
EXPECT_NE(ID1, ID2);
std::string PrintedID1 = ID1->toString();
std::string PrintedID2 = ID2->toString();
EXPECT_NE(PrintedID1, PrintedID2);

std::optional<CASID> ParsedID1, ParsedID2;
ASSERT_THAT_ERROR(CAS->parseID(PrintedID1).moveInto(ParsedID1), Succeeded());
ASSERT_THAT_ERROR(CAS->parseID(PrintedID2).moveInto(ParsedID2), Succeeded());
EXPECT_EQ(ID1, ParsedID1);
EXPECT_EQ(ID2, ParsedID2);
}

TEST_P(CASTest, Blobs) {
std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
StringRef ContentStrings[] = {
"word",
"some longer text std::string's local memory",
R"(multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text)",
};

SmallVector<CASID> IDs;
for (StringRef Content : ContentStrings) {
// Use StringRef::str() to create a temporary std::string. This could cause
// problems if the CAS is storing references to the input string instead of
// copying it.
std::optional<ObjectProxy> Blob;
ASSERT_THAT_ERROR(CAS1->createProxy(std::nullopt, Content).moveInto(Blob),
Succeeded());
IDs.push_back(Blob->getID());

// Check basic printing of IDs.
EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
if (IDs.size() > 2)
EXPECT_NE(IDs.front().toString(), IDs.back().toString());
}

// Check that the blobs give the same IDs later.
for (int I = 0, E = IDs.size(); I != E; ++I) {
std::optional<ObjectProxy> Blob;
ASSERT_THAT_ERROR(
CAS1->createProxy(std::nullopt, ContentStrings[I]).moveInto(Blob),
Succeeded());
EXPECT_EQ(IDs[I], Blob->getID());
}

// Run validation on all CASIDs.
for (int I = 0, E = IDs.size(); I != E; ++I)
ASSERT_THAT_ERROR(CAS1->validate(IDs[I]), Succeeded());

// Check that the blobs can be retrieved multiple times.
for (int I = 0, E = IDs.size(); I != E; ++I) {
for (int J = 0, JE = 3; J != JE; ++J) {
std::optional<ObjectProxy> Buffer;
ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Buffer), Succeeded());
EXPECT_EQ(ContentStrings[I], Buffer->getData());
}
}

// Confirm these blobs don't exist in a fresh CAS instance.
std::unique_ptr<ObjectStore> CAS2 = createObjectStore();
for (int I = 0, E = IDs.size(); I != E; ++I) {
std::optional<ObjectProxy> Proxy;
EXPECT_THAT_ERROR(CAS2->getProxy(IDs[I]).moveInto(Proxy), Failed());
}

// Insert into the second CAS and confirm the IDs are stable. Getting them
// should work now.
for (int I = IDs.size(), E = 0; I != E; --I) {
auto &ID = IDs[I - 1];
auto &Content = ContentStrings[I - 1];
std::optional<ObjectProxy> Blob;
ASSERT_THAT_ERROR(CAS2->createProxy(std::nullopt, Content).moveInto(Blob),
Succeeded());
EXPECT_EQ(ID, Blob->getID());

std::optional<ObjectProxy> Buffer;
ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Buffer), Succeeded());
EXPECT_EQ(Content, Buffer->getData());
}
}

TEST_P(CASTest, BlobsBig) {
// A little bit of validation that bigger blobs are okay. Climb up to 1MB.
std::unique_ptr<ObjectStore> CAS = createObjectStore();
SmallString<256> String1 = StringRef("a few words");
SmallString<256> String2 = StringRef("others");
while (String1.size() < 1024U * 1024U) {
std::optional<CASID> ID1;
std::optional<CASID> ID2;
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID1),
Succeeded());
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String1).moveInto(ID2),
Succeeded());
ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
ASSERT_EQ(ID1, ID2);

String1.append(String2);
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID1),
Succeeded());
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, String2).moveInto(ID2),
Succeeded());
ASSERT_THAT_ERROR(CAS->validate(*ID1), Succeeded());
ASSERT_THAT_ERROR(CAS->validate(*ID2), Succeeded());
ASSERT_EQ(ID1, ID2);
String2.append(String1);
}

// Specifically check near 1MB for objects large enough they're likely to be
// stored externally in an on-disk CAS and will be near a page boundary.
SmallString<0> Storage;
const size_t InterestingSize = 1024U * 1024ULL;
const size_t SizeE = InterestingSize + 2;
if (Storage.size() < SizeE)
Storage.resize(SizeE, '\01');
for (size_t Size = InterestingSize - 2; Size != SizeE; ++Size) {
StringRef Data(Storage.data(), Size);
std::optional<ObjectProxy> Blob;
ASSERT_THAT_ERROR(CAS->createProxy(std::nullopt, Data).moveInto(Blob),
Succeeded());
ASSERT_EQ(Data, Blob->getData());
ASSERT_EQ(0, Blob->getData().end()[0]);
}
}

TEST_P(CASTest, LeafNodes) {
std::unique_ptr<ObjectStore> CAS1 = createObjectStore();
StringRef ContentStrings[] = {
"word",
"some longer text std::string's local memory",
R"(multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text
multiline text multiline text multiline text multiline text multiline text)",
};

SmallVector<ObjectRef> Nodes;
SmallVector<CASID> IDs;
for (StringRef Content : ContentStrings) {
// Use StringRef::str() to create a temporary std::string. This could cause
// problems if the CAS is storing references to the input string instead of
// copying it.
std::optional<ObjectRef> Node;
ASSERT_THAT_ERROR(
CAS1->store(std::nullopt, arrayRefFromStringRef<char>(Content))
.moveInto(Node),
Succeeded());
Nodes.push_back(*Node);

// Check basic printing of IDs.
IDs.push_back(CAS1->getID(*Node));
EXPECT_EQ(IDs.back().toString(), IDs.back().toString());
EXPECT_EQ(Nodes.front(), Nodes.front());
EXPECT_EQ(Nodes.back(), Nodes.back());
EXPECT_EQ(IDs.front(), IDs.front());
EXPECT_EQ(IDs.back(), IDs.back());
if (Nodes.size() <= 1)
continue;
EXPECT_NE(Nodes.front(), Nodes.back());
EXPECT_NE(IDs.front(), IDs.back());
}

// Check that the blobs give the same IDs later.
for (int I = 0, E = IDs.size(); I != E; ++I) {
std::optional<ObjectRef> Node;
ASSERT_THAT_ERROR(CAS1->store(std::nullopt, arrayRefFromStringRef<char>(
ContentStrings[I]))
.moveInto(Node),
Succeeded());
EXPECT_EQ(IDs[I], CAS1->getID(*Node));
}

// Check that the blobs can be retrieved multiple times.
for (int I = 0, E = IDs.size(); I != E; ++I) {
for (int J = 0, JE = 3; J != JE; ++J) {
std::optional<ObjectProxy> Object;
ASSERT_THAT_ERROR(CAS1->getProxy(IDs[I]).moveInto(Object), Succeeded());
ASSERT_TRUE(Object);
EXPECT_EQ(ContentStrings[I], Object->getData());
}
}

// Confirm these blobs don't exist in a fresh CAS instance.
std::unique_ptr<ObjectStore> CAS2 = createObjectStore();
for (int I = 0, E = IDs.size(); I != E; ++I) {
std::optional<ObjectProxy> Object;
EXPECT_THAT_ERROR(CAS2->getProxy(IDs[I]).moveInto(Object), Failed());
}

// Insert into the second CAS and confirm the IDs are stable. Getting them
// should work now.
for (int I = IDs.size(), E = 0; I != E; --I) {
auto &ID = IDs[I - 1];
auto &Content = ContentStrings[I - 1];
std::optional<ObjectRef> Node;
ASSERT_THAT_ERROR(
CAS2->store(std::nullopt, arrayRefFromStringRef<char>(Content))
.moveInto(Node),
Succeeded());
EXPECT_EQ(ID, CAS2->getID(*Node));

std::optional<ObjectProxy> Object;
ASSERT_THAT_ERROR(CAS2->getProxy(ID).moveInto(Object), Succeeded());
ASSERT_TRUE(Object);
EXPECT_EQ(Content, Object->getData());
}
}

TEST_P(CASTest, NodesBig) {
std::unique_ptr<ObjectStore> CAS = createObjectStore();

// Specifically check near 1MB for objects large enough they're likely to be
// stored externally in an on-disk CAS, and such that one of them will be
// near a page boundary.
SmallString<0> Storage;
constexpr size_t InterestingSize = 1024U * 1024ULL;
constexpr size_t WordSize = sizeof(void *);

// Start much smaller to account for headers.
constexpr size_t SizeB = InterestingSize - 8 * WordSize;
constexpr size_t SizeE = InterestingSize + 1;
if (Storage.size() < SizeE)
Storage.resize(SizeE, '\01');

SmallVector<ObjectRef, 4> CreatedNodes;
// Avoid checking every size because this is an expensive test. Just check
// for data that is 8B-word-aligned, and one less. Also appending the created
// nodes as the references in the next block to check references are created
// correctly.
for (size_t Size = SizeB; Size < SizeE; Size += WordSize) {
for (bool IsAligned : {false, true}) {
StringRef Data(Storage.data(), Size - (IsAligned ? 0 : 1));
std::optional<ObjectProxy> Node;
ASSERT_THAT_ERROR(CAS->createProxy(CreatedNodes, Data).moveInto(Node),
Succeeded());
ASSERT_EQ(Data, Node->getData());
ASSERT_EQ(0, Node->getData().end()[0]);
ASSERT_EQ(Node->getNumReferences(), CreatedNodes.size());
CreatedNodes.emplace_back(Node->getRef());
}
}

for (auto ID : CreatedNodes)
ASSERT_THAT_ERROR(CAS->validate(CAS->getID(ID)), Succeeded());
}

/// Common test functionality for creating blobs in parallel. You can vary which
/// cas instances are the same or different, and the size of the created blobs.
static void testBlobsParallel(ObjectStore &Read1, ObjectStore &Read2,
ObjectStore &Write1, ObjectStore &Write2,
uint64_t BlobSize) {
SCOPED_TRACE(testBlobsParallel);
unsigned BlobCount = 100;
std::vector<std::string> Blobs;
Blobs.reserve(BlobCount);
for (unsigned I = 0; I < BlobCount; ++I) {
std::string Blob;
Blob.reserve(BlobSize);
while (Blob.size() < BlobSize) {
auto R = sys::Process::GetRandomNumber();
Blob.append((char *)&R, sizeof(R));
}
assert(Blob.size() >= BlobSize);
Blob.resize(BlobSize);
Blobs.push_back(std::move(Blob));
}

std::mutex NodesMtx;
std::vector<std::optional<CASID>> CreatedNodes(BlobCount);

auto Producer = [&](unsigned I, ObjectStore *CAS) {
std::optional<ObjectProxy> Node;
EXPECT_THAT_ERROR(CAS->createProxy({}, Blobs[I]).moveInto(Node),
Succeeded());
{
std::lock_guard<std::mutex> L(NodesMtx);
CreatedNodes[I] = Node ? Node->getID() : CASID::getDenseMapTombstoneKey();
}
};

auto Consumer = [&](unsigned I, ObjectStore *CAS) {
std::optional<CASID> ID;
while (!ID) {
// Busy wait.
std::lock_guard<std::mutex> L(NodesMtx);
ID = CreatedNodes[I];
}
if (ID == CASID::getDenseMapTombstoneKey())
// Producer failed; already reported.
return;

std::optional<ObjectProxy> Node;
ASSERT_THAT_ERROR(CAS->getProxy(*ID).moveInto(Node), Succeeded());
EXPECT_EQ(Node->getData(), Blobs[I]);
};

DefaultThreadPool Threads;
for (unsigned I = 0; I < BlobCount; ++I) {
Threads.async(Consumer, I, &Read1);
Threads.async(Consumer, I, &Read2);
Threads.async(Producer, I, &Write1);
Threads.async(Producer, I, &Write2);
}

Threads.wait();
}

static void testBlobsParallel1(ObjectStore &CAS, uint64_t BlobSize) {
SCOPED_TRACE(testBlobsParallel1);
testBlobsParallel(CAS, CAS, CAS, CAS, BlobSize);
}

TEST_P(CASTest, BlobsParallel) {
std::shared_ptr<ObjectStore> CAS = createObjectStore();
uint64_t Size = 1ULL * 1024;
ASSERT_NO_FATAL_FAILURE(testBlobsParallel1(*CAS, Size));
}

#ifdef EXPENSIVE_CHECKS
TEST_P(CASTest, BlobsBigParallel) {
std::shared_ptr<ObjectStore> CAS = createObjectStore();
// 100k is large enough to be standalone files in our on-disk cas.
uint64_t Size = 100ULL * 1024;
ASSERT_NO_FATAL_FAILURE(testBlobsParallel1(*CAS, Size));
}
#endif
69 changes: 69 additions & 0 deletions llvm/unittests/CAS/OnDiskCommonUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//===- llvm/unittest/CAS/OnDiskCommonUtils.h --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/CAS/BuiltinObjectHasher.h"
#include "llvm/CAS/OnDiskGraphDB.h"
#include "llvm/Support/BLAKE3.h"

namespace llvm::unittest::cas {

using namespace llvm::cas;
using namespace llvm::cas::ondisk;

using HasherT = BLAKE3;
using HashType = decltype(HasherT::hash(std::declval<ArrayRef<uint8_t> &>()));
using ValueType = std::array<char, 20>;

inline HashType digest(StringRef Data, ArrayRef<ArrayRef<uint8_t>> RefHashes) {
return BuiltinObjectHasher<HasherT>::hashObject(
RefHashes, arrayRefFromStringRef<char>(Data));
}

inline ObjectID digest(OnDiskGraphDB &DB, StringRef Data,
ArrayRef<ObjectID> Refs) {
SmallVector<ArrayRef<uint8_t>, 8> RefHashes;
for (ObjectID Ref : Refs)
RefHashes.push_back(DB.getDigest(Ref));
HashType Digest = digest(Data, RefHashes);
return DB.getReference(Digest);
}

inline HashType digest(StringRef Data) {
return HasherT::hash(arrayRefFromStringRef(Data));
}

inline ValueType valueFromString(StringRef S) {
ValueType Val;
llvm::copy(S.substr(0, sizeof(Val)), Val.data());
return Val;
}

inline Expected<ObjectID> store(OnDiskGraphDB &DB, StringRef Data,
ArrayRef<ObjectID> Refs) {
ObjectID ID = digest(DB, Data, Refs);
if (Error E = DB.store(ID, Refs, arrayRefFromStringRef<char>(Data)))
return std::move(E);
return ID;
}

inline Error printTree(OnDiskGraphDB &DB, ObjectID ID, raw_ostream &OS,
unsigned Indent = 0) {
std::optional<ondisk::ObjectHandle> Obj;
if (Error E = DB.load(ID).moveInto(Obj))
return E;
if (!Obj)
return Error::success();
OS.indent(Indent) << toStringRef(DB.getObjectData(*Obj)) << '\n';
for (ObjectID Ref : DB.getObjectRefs(*Obj)) {
if (Error E = printTree(DB, Ref, OS, Indent + 2))
return E;
}
return Error::success();
}

} // namespace llvm::unittest::cas
284 changes: 284 additions & 0 deletions llvm/unittests/CAS/OnDiskGraphDBTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
//===- llvm/unittest/CAS/OnDiskGraphDBTest.cpp ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "OnDiskCommonUtils.h"
#include "llvm/Testing/Support/Error.h"
#include "llvm/Testing/Support/SupportHelpers.h"
#include "gtest/gtest.h"

#if LLVM_ENABLE_ONDISK_CAS

using namespace llvm;
using namespace llvm::cas;
using namespace llvm::cas::ondisk;
using namespace llvm::unittest::cas;

TEST(OnDiskGraphDBTest, Basic) {
unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
std::unique_ptr<OnDiskGraphDB> DB;
ASSERT_THAT_ERROR(
OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB),
Succeeded());

auto digest = [&DB](StringRef Data, ArrayRef<ObjectID> Refs) -> ObjectID {
return ::digest(*DB, Data, Refs);
};

auto store = [&](StringRef Data,
ArrayRef<ObjectID> Refs) -> Expected<ObjectID> {
return ::store(*DB, Data, Refs);
};

std::optional<ObjectID> ID1;
ASSERT_THAT_ERROR(store("hello", {}).moveInto(ID1), Succeeded());

std::optional<ondisk::ObjectHandle> Obj1;
ASSERT_THAT_ERROR(DB->load(*ID1).moveInto(Obj1), Succeeded());
ASSERT_TRUE(Obj1.has_value());
EXPECT_EQ(toStringRef(DB->getObjectData(*Obj1)), "hello");

ArrayRef<uint8_t> Digest1 = DB->getDigest(*ID1);
ObjectID ID2 = DB->getReference(Digest1);
EXPECT_EQ(ID1, ID2);

ObjectID ID3 = digest("world", {});
EXPECT_FALSE(DB->containsObject(ID3));
std::optional<ondisk::ObjectHandle> Obj2;
ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded());
EXPECT_FALSE(Obj2.has_value());

ASSERT_THAT_ERROR(DB->store(ID3, {}, arrayRefFromStringRef<char>("world")),
Succeeded());
EXPECT_TRUE(DB->containsObject(ID3));
ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj2), Succeeded());
ASSERT_TRUE(Obj2.has_value());
EXPECT_EQ(toStringRef(DB->getObjectData(*Obj2)), "world");

size_t LargeDataSize = 256LL * 1024LL; // 256K.
// The precise size number is not important, we mainly check that the large
// object will be properly accounted for.
EXPECT_TRUE(DB->getStorageSize() > 10 &&
DB->getStorageSize() < LargeDataSize);

SmallString<16> Buffer;
Buffer.resize(LargeDataSize);
ASSERT_THAT_ERROR(store(Buffer, {}).moveInto(ID1), Succeeded());
size_t StorageSize = DB->getStorageSize();
EXPECT_TRUE(StorageSize > LargeDataSize);

// Close & re-open the DB and check that it reports the same storage size.
DB.reset();
ASSERT_THAT_ERROR(
OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType)).moveInto(DB),
Succeeded());
EXPECT_EQ(DB->getStorageSize(), StorageSize);
}

TEST(OnDiskGraphDBTest, FaultInSingleNode) {
unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true);
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
ASSERT_THAT_ERROR(
OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType))
.moveInto(UpstreamDB),
Succeeded());
{
std::optional<ObjectID> ID1;
ASSERT_THAT_ERROR(store(*UpstreamDB, "hello", {}).moveInto(ID1),
Succeeded());
std::optional<ObjectID> ID2;
ASSERT_THAT_ERROR(store(*UpstreamDB, "another", {}).moveInto(ID2),
Succeeded());
std::optional<ObjectID> ID3;
ASSERT_THAT_ERROR(store(*UpstreamDB, "world", {*ID1, *ID2}).moveInto(ID3),
Succeeded());
}

unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
std::unique_ptr<OnDiskGraphDB> DB;
ASSERT_THAT_ERROR(
OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
std::move(UpstreamDB),
OnDiskGraphDB::FaultInPolicy::SingleNode)
.moveInto(DB),
Succeeded());

ObjectID ID1 = digest(*DB, "hello", {});
ObjectID ID2 = digest(*DB, "another", {});
ObjectID ID3 = digest(*DB, "world", {ID1, ID2});
ObjectID ID4 = digest(*DB, "world", {});

EXPECT_TRUE(DB->containsObject(ID1));
EXPECT_TRUE(DB->containsObject(ID2));
EXPECT_TRUE(DB->containsObject(ID3));
EXPECT_FALSE(DB->containsObject(ID4));

EXPECT_TRUE(DB->getExistingReference(digest("hello", {})).has_value());
EXPECT_TRUE(DB->getExistingReference(DB->getDigest(ID3)).has_value());
EXPECT_FALSE(DB->getExistingReference(digest("world", {})).has_value());

{
std::optional<ondisk::ObjectHandle> Obj;
ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded());
ASSERT_TRUE(Obj.has_value());
EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello");
auto Refs = DB->getObjectRefs(*Obj);
EXPECT_TRUE(Refs.empty());
}
{
std::optional<ondisk::ObjectHandle> Obj;
ASSERT_THAT_ERROR(DB->load(ID3).moveInto(Obj), Succeeded());
ASSERT_TRUE(Obj.has_value());
EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "world");
auto Refs = DB->getObjectRefs(*Obj);
ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2);
EXPECT_EQ(Refs.begin()[0], ID1);
EXPECT_EQ(Refs.begin()[1], ID2);
}
{
std::optional<ondisk::ObjectHandle> Obj;
ASSERT_THAT_ERROR(DB->load(ID4).moveInto(Obj), Succeeded());
EXPECT_FALSE(Obj.has_value());
}

// Re-open the primary without chaining, to verify the data were copied from
// the upstream.
ASSERT_THAT_ERROR(
OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
/*UpstreamDB=*/nullptr,
OnDiskGraphDB::FaultInPolicy::SingleNode)
.moveInto(DB),
Succeeded());
ID1 = digest(*DB, "hello", {});
ID2 = digest(*DB, "another", {});
ID3 = digest(*DB, "world", {ID1, ID2});
EXPECT_TRUE(DB->containsObject(ID1));
EXPECT_FALSE(DB->containsObject(ID2));
EXPECT_TRUE(DB->containsObject(ID3));
{
std::optional<ondisk::ObjectHandle> Obj;
ASSERT_THAT_ERROR(DB->load(ID1).moveInto(Obj), Succeeded());
ASSERT_TRUE(Obj.has_value());
EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "hello");
auto Refs = DB->getObjectRefs(*Obj);
EXPECT_TRUE(Refs.empty());
}
}

TEST(OnDiskGraphDBTest, FaultInFullTree) {
unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true);
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
ASSERT_THAT_ERROR(
OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType))
.moveInto(UpstreamDB),
Succeeded());
HashType RootHash;
{
std::optional<ObjectID> ID11;
ASSERT_THAT_ERROR(store(*UpstreamDB, "11", {}).moveInto(ID11), Succeeded());
std::optional<ObjectID> ID121;
ASSERT_THAT_ERROR(store(*UpstreamDB, "121", {}).moveInto(ID121),
Succeeded());
std::optional<ObjectID> ID12;
ASSERT_THAT_ERROR(store(*UpstreamDB, "12", {*ID121}).moveInto(ID12),
Succeeded());
std::optional<ObjectID> ID1;
ASSERT_THAT_ERROR(store(*UpstreamDB, "1", {*ID11, *ID12}).moveInto(ID1),
Succeeded());
std::optional<ObjectID> ID21;
ASSERT_THAT_ERROR(store(*UpstreamDB, "21", {}).moveInto(ID21), Succeeded());
std::optional<ObjectID> ID22;
ASSERT_THAT_ERROR(store(*UpstreamDB, "22", {}).moveInto(ID22), Succeeded());
std::optional<ObjectID> ID2;
ASSERT_THAT_ERROR(
store(*UpstreamDB, "2", {*ID12, *ID21, *ID22}).moveInto(ID2),
Succeeded());
std::optional<ObjectID> IDRoot;
ASSERT_THAT_ERROR(store(*UpstreamDB, "root", {*ID1, *ID2}).moveInto(IDRoot),
Succeeded());
ArrayRef<uint8_t> Digest = UpstreamDB->getDigest(*IDRoot);
ASSERT_EQ(Digest.size(), RootHash.size());
llvm::copy(Digest, RootHash.data());
}

unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
std::unique_ptr<OnDiskGraphDB> DB;
ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
std::move(UpstreamDB),
OnDiskGraphDB::FaultInPolicy::FullTree)
.moveInto(DB),
Succeeded());

{
ObjectID IDRoot = DB->getReference(RootHash);
std::optional<ondisk::ObjectHandle> Obj;
ASSERT_THAT_ERROR(DB->load(IDRoot).moveInto(Obj), Succeeded());
ASSERT_TRUE(Obj.has_value());
EXPECT_EQ(toStringRef(DB->getObjectData(*Obj)), "root");
auto Refs = DB->getObjectRefs(*Obj);
ASSERT_EQ(std::distance(Refs.begin(), Refs.end()), 2);
}

// Re-open the primary without chaining, to verify the data were copied from
// the upstream.
ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType),
/*UpstreamDB=*/nullptr,
OnDiskGraphDB::FaultInPolicy::FullTree)
.moveInto(DB),
Succeeded());

ObjectID IDRoot = DB->getReference(RootHash);
std::string PrintedTree;
raw_string_ostream OS(PrintedTree);
ASSERT_THAT_ERROR(printTree(*DB, IDRoot, OS), Succeeded());
StringRef Expected = R"(root
1
11
12
121
2
12
121
21
22
)";
EXPECT_EQ(PrintedTree, Expected);
}

TEST(OnDiskGraphDBTest, FaultInPolicyConflict) {
auto tryFaultInPolicyConflict = [](OnDiskGraphDB::FaultInPolicy Policy1,
OnDiskGraphDB::FaultInPolicy Policy2) {
unittest::TempDir TempUpstream("ondiskcas-upstream", /*Unique=*/true);
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
ASSERT_THAT_ERROR(
OnDiskGraphDB::open(TempUpstream.path(), "blake3", sizeof(HashType))
.moveInto(UpstreamDB),
Succeeded());

unittest::TempDir Temp("ondiskcas", /*Unique=*/true);
std::unique_ptr<OnDiskGraphDB> DB;
ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3",
sizeof(HashType),
std::move(UpstreamDB), Policy1)
.moveInto(DB),
Succeeded());
DB.reset();
ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3",
sizeof(HashType),
std::move(UpstreamDB), Policy2)
.moveInto(DB),
Failed());
};
// Open as 'single', then as 'full'.
tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::SingleNode,
OnDiskGraphDB::FaultInPolicy::FullTree);
// Open as 'full', then as 'single'.
tryFaultInPolicyConflict(OnDiskGraphDB::FaultInPolicy::FullTree,
OnDiskGraphDB::FaultInPolicy::SingleNode);
}

#endif // LLVM_ENABLE_ONDISK_CAS
146 changes: 146 additions & 0 deletions llvm/unittests/CAS/OnDiskHashMappedTrieTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
//===- OnDiskHashMappedTrieTest.cpp ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/CAS/OnDiskHashMappedTrie.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Testing/Support/Error.h"
#include "llvm/Testing/Support/SupportHelpers.h"
#include "gtest/gtest.h"

#if LLVM_ENABLE_ONDISK_CAS
using namespace llvm;
using namespace llvm::cas;

namespace {

TEST(OnDiskHashMappedTrieTest, Insertion) {
unittest::TempDir Temp("on-disk-hash-mapped-trie", /*Unique=*/true);

// Create tries with various sizes of hash and with data.
//
// NOTE: The check related to \a recoverFromFileOffset() catches a potential
// off-by-one bounds-checking bug when the trie record size (data + hash) add
// up to a multiple of 8B. Iterate through a few different hash sizes to
// check it both ways.
constexpr size_t MB = 1024u * 1024u;
constexpr size_t DataSize = 8; // Multiple of 8B.
for (size_t NumHashBytes : {1, 2, 4, 8}) {
size_t NumHashBits = NumHashBytes * 8;

auto createTrie = [&]() {
return OnDiskHashMappedTrie::create(
Temp.path((Twine(NumHashBytes) + "B").str()), "index",
/*NumHashBits=*/NumHashBits, DataSize, /*MaxFileSize=*/MB,
/*NewInitialFileSize=*/std::nullopt);
};

std::optional<OnDiskHashMappedTrie> Trie1;
ASSERT_THAT_ERROR(createTrie().moveInto(Trie1), Succeeded());
std::optional<OnDiskHashMappedTrie> Trie2;
ASSERT_THAT_ERROR(createTrie().moveInto(Trie2), Succeeded());

uint8_t Hash0Bytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
uint8_t Hash1Bytes[8] = {1, 0, 0, 0, 0, 0, 0, 0};
auto Hash0 = ArrayRef(Hash0Bytes).take_front(NumHashBytes);
auto Hash1 = ArrayRef(Hash1Bytes).take_front(NumHashBytes);
constexpr StringLiteral Data0v1Bytes = "data0.v1";
constexpr StringLiteral Data0v2Bytes = "data0.v2";
constexpr StringLiteral Data1Bytes = "data1...";
static_assert(Data0v1Bytes.size() == DataSize, "math error");
static_assert(Data0v2Bytes.size() == DataSize, "math error");
static_assert(Data1Bytes.size() == DataSize, "math error");
ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size());
ArrayRef<char> Data0v2 = ArrayRef(Data0v2Bytes.data(), Data0v2Bytes.size());
ArrayRef<char> Data1 = ArrayRef(Data1Bytes.data(), Data1Bytes.size());

// Lookup when trie is empty.
EXPECT_FALSE(Trie1->find(Hash0));

// Insert.
std::optional<FileOffset> Offset;
std::optional<MutableArrayRef<char>> Data;
{
auto Insertion = Trie1->insert({Hash0, Data0v1});
ASSERT_TRUE(Insertion);
EXPECT_EQ(Hash0, Insertion->Hash);
EXPECT_EQ(Data0v1, Insertion->Data);
EXPECT_TRUE(isAddrAligned(Align(8), Insertion->Data.data()));

Offset = Insertion.getOffset();
Data = Insertion->Data;
}

// Find.
{
auto Lookup = Trie1->find(Hash0);
ASSERT_TRUE(Lookup);
EXPECT_EQ(Hash0, Lookup->Hash);
EXPECT_EQ(Data0v1, Lookup->Data);
EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
}

// Find in a different instance of the same on-disk trie that existed
// before the insertion.
{
auto Lookup = Trie2->find(Hash0);
ASSERT_TRUE(Lookup);
EXPECT_EQ(Hash0, Lookup->Hash);
EXPECT_EQ(Data0v1, Lookup->Data);
EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
}

// Create a new instance and check that too.
Trie2.reset();
ASSERT_THAT_ERROR(createTrie().moveInto(Trie2), Succeeded());
{
auto Lookup = Trie2->find(Hash0);
ASSERT_TRUE(Lookup);
EXPECT_EQ(Hash0, Lookup->Hash);
EXPECT_EQ(Data0v1, Lookup->Data);
EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
}

// Change the data.
llvm::copy(Data0v2, Data->data());
{
auto Lookup = Trie2->find(Hash0);
ASSERT_TRUE(Lookup);
EXPECT_EQ(Hash0, Lookup->Hash);
EXPECT_EQ(Data0v2, Lookup->Data);
EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
}

// Find different hash.
EXPECT_FALSE(Trie1->find(Hash1));
EXPECT_FALSE(Trie2->find(Hash1));

// Recover from an offset.
{
auto Recovered = Trie1->recoverFromFileOffset(*Offset);
ASSERT_TRUE(Recovered);
EXPECT_EQ(Offset->get(), Recovered.getOffset().get());
EXPECT_EQ(Hash0, Recovered->Hash);
EXPECT_EQ(Data0v2, Recovered->Data);
}

// Insert another thing.
{
auto Insertion = Trie1->insert({Hash1, Data1});
ASSERT_TRUE(Insertion);
EXPECT_EQ(Hash1, Insertion->Hash);
EXPECT_EQ(Data1, Insertion->Data);
EXPECT_TRUE(isAddrAligned(Align(8), Insertion->Data.data()));
EXPECT_NE(Offset->get(), Insertion.getOffset().get());
}
}
}

} // namespace

#endif
54 changes: 54 additions & 0 deletions llvm/unittests/CAS/OnDiskKeyValueDBTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//===- llvm/unittest/CAS/OnDiskKeyValueDBTest.cpp -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/CAS/OnDiskKeyValueDB.h"
#include "OnDiskCommonUtils.h"
#include "llvm/Testing/Support/Error.h"
#include "llvm/Testing/Support/SupportHelpers.h"
#include "gtest/gtest.h"

#if LLVM_ENABLE_ONDISK_CAS

using namespace llvm;
using namespace llvm::cas;
using namespace llvm::cas::ondisk;
using namespace llvm::unittest::cas;

TEST(OnDiskKeyValueDBTest, Basic) {
unittest::TempDir Temp("ondiskkv", /*Unique=*/true);
std::unique_ptr<OnDiskKeyValueDB> DB;
ASSERT_THAT_ERROR(OnDiskKeyValueDB::open(Temp.path(), "blake3",
sizeof(HashType), "test",
sizeof(ValueType))
.moveInto(DB),
Succeeded());

{
std::optional<ArrayRef<char>> Val;
ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded());
EXPECT_FALSE(Val.has_value());
}

ValueType ValW = valueFromString("world");
ArrayRef<char> Val;
ASSERT_THAT_ERROR(DB->put(digest("hello"), ValW).moveInto(Val), Succeeded());
EXPECT_EQ(Val, ArrayRef(ValW));
ASSERT_THAT_ERROR(
DB->put(digest("hello"), valueFromString("other")).moveInto(Val),
Succeeded());
EXPECT_EQ(Val, ArrayRef(ValW));

{
std::optional<ArrayRef<char>> Val;
ASSERT_THAT_ERROR(DB->get(digest("hello")).moveInto(Val), Succeeded());
EXPECT_TRUE(Val.has_value());
EXPECT_EQ(*Val, ArrayRef(ValW));
}
}

#endif // LLVM_ENABLE_ONDISK_CAS
151 changes: 151 additions & 0 deletions llvm/unittests/CAS/ProgramTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
//===- MappedFileRegionBumpPtrTest.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/Program.h"
#include "llvm/CAS/MappedFileRegionBumpPtr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ThreadPool.h"
#include "gtest/gtest.h"
#if defined(__APPLE__)
#include <crt_externs.h>
#elif !defined(_MSC_VER)
// Forward declare environ in case it's not provided by stdlib.h.
extern char **environ;
#endif

using namespace llvm;
using namespace llvm::cas;

extern const char *TestMainArgv0;
static char ProgramID = 0;

class CASProgramTest : public testing::Test {
std::vector<StringRef> EnvTable;
std::vector<std::string> EnvStorage;

protected:
void SetUp() override {
auto EnvP = [] {
#if defined(_WIN32)
_wgetenv(L"TMP"); // Populate _wenviron, initially is null
return _wenviron;
#elif defined(__APPLE__)
return *_NSGetEnviron();
#else
return environ;
#endif
}();
ASSERT_TRUE(EnvP);

auto prepareEnvVar = [this](decltype(*EnvP) Var) -> StringRef {
#if defined(_WIN32)
// On Windows convert UTF16 encoded variable to UTF8
auto Len = wcslen(Var);
ArrayRef<char> Ref{reinterpret_cast<char const *>(Var),
Len * sizeof(*Var)};
EnvStorage.emplace_back();
auto convStatus = convertUTF16ToUTF8String(Ref, EnvStorage.back());
EXPECT_TRUE(convStatus);
return EnvStorage.back();
#else
(void)this;
return StringRef(Var);
#endif
};

while (*EnvP != nullptr) {
auto S = prepareEnvVar(*EnvP);
if (!StringRef(S).starts_with("GTEST_"))
EnvTable.emplace_back(S);
++EnvP;
}
}

void TearDown() override {
EnvTable.clear();
EnvStorage.clear();
}

void addEnvVar(StringRef Var) { EnvTable.emplace_back(Var); }

ArrayRef<StringRef> getEnviron() const { return EnvTable; }
};

#if LLVM_ENABLE_ONDISK_CAS

TEST_F(CASProgramTest, MappedFileRegionBumpPtrTest) {
auto TestAllocator = [](StringRef Path) {
auto NewFileConstructor = [&](MappedFileRegionBumpPtr &Alloc) -> Error {
Alloc.initializeBumpPtr(0);
return Error::success();
};

auto Alloc = MappedFileRegionBumpPtr::create(
Path, /*Capacity=*/10 * 1024 * 1024,
/*BumpPtrOffset=*/0, NewFileConstructor);
if (!Alloc)
ASSERT_TRUE(false);

std::vector<unsigned *> AllocatedPtr;
AllocatedPtr.resize(100);
DefaultThreadPool Threads;
for (unsigned I = 0; I < 100; ++I) {
Threads.async(
[&](unsigned Idx) {
// Allocate a buffer that is larger than needed so allocator hits
// additional pages for test coverage.
unsigned *P = (unsigned *)Alloc->allocate(100);
*P = Idx;
AllocatedPtr[Idx] = P;
},
I);
}

Threads.wait();
for (unsigned I = 0; I < 100; ++I)
EXPECT_EQ(*AllocatedPtr[I], I);
};

if (const char *File = getenv("LLVM_CAS_TEST_MAPPED_FILE_REGION")) {
TestAllocator(File);
exit(0);
}

SmallString<128> FilePath;
sys::fs::createUniqueDirectory("MappedFileRegionBumpPtr", FilePath);
sys::path::append(FilePath, "allocation-file");

std::string Executable =
sys::fs::getMainExecutable(TestMainArgv0, &ProgramID);
StringRef Argv[] = {
Executable, "--gtest_filter=CASProgramTest.MappedFileRegionBumpPtrTest"};

// Add LLVM_PROGRAM_TEST_LOCKED_FILE to the environment of the child.
std::string EnvVar = "LLVM_CAS_TEST_MAPPED_FILE_REGION=";
EnvVar += FilePath.str();
addEnvVar(EnvVar);

std::string Error;
bool ExecutionFailed;
sys::ProcessInfo PI = sys::ExecuteNoWait(Executable, Argv, getEnviron(), {},
0, &Error, &ExecutionFailed);
TestAllocator(FilePath);

ASSERT_FALSE(ExecutionFailed) << Error;
ASSERT_TRUE(Error.empty());
ASSERT_NE(PI.Pid, sys::ProcessInfo::InvalidPid) << "Invalid process id";
llvm::sys::Wait(PI, /*SecondsToWait=*/5, &Error);
ASSERT_TRUE(Error.empty());

// Clean up after both processes finish testing.
sys::fs::remove(FilePath);
sys::fs::remove_directories(sys::path::parent_path(FilePath));
}

#endif // LLVM_ENABLE_ONDISK_CAS
1 change: 1 addition & 0 deletions llvm/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ add_subdirectory(AsmParser)
add_subdirectory(BinaryFormat)
add_subdirectory(Bitcode)
add_subdirectory(Bitstream)
add_subdirectory(CAS)
add_subdirectory(CGData)
add_subdirectory(CodeGen)
add_subdirectory(DebugInfo)
Expand Down
76 changes: 76 additions & 0 deletions llvm/unittests/Support/ProgramTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ExponentialBackoff.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
Expand Down Expand Up @@ -561,6 +562,81 @@ TEST_F(ProgramEnvTest, TestLockFile) {
sys::fs::remove(LockedFile);
}

TEST_F(ProgramEnvTest, TestLockFileExclusive) {
using namespace llvm::sys;
using namespace std::chrono_literals;

if (const char *LockedFile = getenv("LLVM_PROGRAM_TEST_LOCKED_FILE")) {
// Child process.
int FD2;
ASSERT_NO_ERROR(fs::openFileForReadWrite(LockedFile, FD2,
fs::CD_OpenExisting, fs::OF_None));

std::error_code ErrC =
fs::tryLockFile(FD2, std::chrono::seconds(0), /*Exclusive=*/true);
EXPECT_TRUE(ErrC);
close(FD2);
// Write a file to indicate just finished.
std::string FinishFile = std::string(LockedFile) + "-finished";
int FD3;
ASSERT_NO_ERROR(fs::openFileForReadWrite(FinishFile, FD3, fs::CD_CreateNew,
fs::OF_None));
close(FD3);
exit(0);
}

// Create file that will be locked.
SmallString<64> LockedFile;
int FD1;
ASSERT_NO_ERROR(
fs::createUniqueDirectory("TestLockFileExclusive", LockedFile));
sys::path::append(LockedFile, "file");
ASSERT_NO_ERROR(
fs::openFileForReadWrite(LockedFile, FD1, fs::CD_CreateNew, fs::OF_None));

std::string Executable =
sys::fs::getMainExecutable(TestMainArgv0, &ProgramTestStringArg1);
StringRef argv[] = {Executable,
"--gtest_filter=ProgramEnvTest.TestLockFileExclusive"};

// Add LLVM_PROGRAM_TEST_LOCKED_FILE to the environment of the child.
std::string EnvVar = "LLVM_PROGRAM_TEST_LOCKED_FILE=";
EnvVar += LockedFile.str();
addEnvVar(EnvVar);

// Lock the file.
ASSERT_NO_ERROR(fs::tryLockFile(FD1));

std::string Error;
bool ExecutionFailed;
ProcessInfo PI2 = ExecuteNoWait(Executable, argv, getEnviron(), {}, 0, &Error,
&ExecutionFailed);
ASSERT_FALSE(ExecutionFailed) << Error;
ASSERT_TRUE(Error.empty());
ASSERT_NE(PI2.Pid, ProcessInfo::InvalidPid) << "Invalid process id";

std::string FinishFile = std::string(LockedFile) + "-finished";
// Wait till child process writes the file to indicate the job finished.
bool Finished = false;
ExponentialBackoff Backoff(5s); // timeout 5s.
do {
if (fs::exists(FinishFile)) {
Finished = true;
break;
}
} while (Backoff.waitForNextAttempt());

ASSERT_TRUE(Finished);
ASSERT_NO_ERROR(fs::unlockFile(FD1));
ProcessInfo WaitResult = llvm::sys::Wait(PI2, /*SecondsToWait=*/1, &Error);
ASSERT_TRUE(Error.empty());
ASSERT_EQ(0, WaitResult.ReturnCode);
ASSERT_EQ(WaitResult.Pid, PI2.Pid);
sys::fs::remove(LockedFile);
sys::fs::remove(FinishFile);
sys::fs::remove_directories(sys::path::parent_path(LockedFile));
}

TEST_F(ProgramEnvTest, TestExecuteWithNoStacktraceHandler) {
using namespace llvm::sys;

Expand Down