Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disk array packed headers #3557

Merged
merged 2 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.15)

project(Kuzu VERSION 0.4.2.5 LANGUAGES CXX C)
project(Kuzu VERSION 0.4.2.6 LANGUAGES CXX C)

find_package(Threads REQUIRED)

Expand Down
Binary file modified dataset/binary-demo/catalog.kz
Binary file not shown.
Binary file modified dataset/binary-demo/data.kz
Binary file not shown.
Binary file modified dataset/binary-demo/metadata.kz
Binary file not shown.
Binary file modified dataset/binary-demo/n-0.hindex
Binary file not shown.
Binary file modified dataset/binary-demo/n-1.hindex
Binary file not shown.
2 changes: 1 addition & 1 deletion dataset/binary-demo/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.2.5
0.4.2.6
Binary file modified dataset/databases/tinysnb/catalog.kz
Binary file not shown.
Binary file modified dataset/databases/tinysnb/data.kz
Binary file not shown.
Binary file modified dataset/databases/tinysnb/metadata.kz
Binary file not shown.
Binary file modified dataset/databases/tinysnb/n-0.hindex
Binary file not shown.
Binary file modified dataset/databases/tinysnb/n-1.hindex
Binary file not shown.
Binary file modified dataset/databases/tinysnb/n-2.hindex
Binary file not shown.
Binary file modified dataset/databases/tinysnb/nodes.statistics_and_deleted.ids
Binary file not shown.
Binary file modified dataset/databases/tinysnb/rels.statistics
Binary file not shown.
2 changes: 1 addition & 1 deletion dataset/databases/tinysnb/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.2.5
0.4.2.6
2 changes: 1 addition & 1 deletion src/include/common/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct BufferPoolConstants {
static constexpr uint64_t DEFAULT_VM_REGION_MAX_SIZE = (uint64_t)1 << 43; // (8TB)
#endif

static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 26; // (64MB)
static constexpr uint64_t DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING = 1ull << 27; // (128MB)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder which test fails due to bm size.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the multi copy test (I've run into this before; that time it was caused by a bug), which I think writes a gigabyte or so of hash index data. But it doesn't need all that memory at one time, so it should be fine with a small buffer pool.

};

struct StorageConstants {
Expand Down
8 changes: 4 additions & 4 deletions src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ class TypeUtils {
static std::string relToString(const struct_entry_t& val, ValueVector* vector);

static inline void encodeOverflowPtr(uint64_t& overflowPtr, page_idx_t pageIdx,
uint16_t pageOffset) {
uint32_t pageOffset) {
memcpy(&overflowPtr, &pageIdx, 4);
memcpy(((uint8_t*)&overflowPtr) + 4, &pageOffset, 2);
memcpy(((uint8_t*)&overflowPtr) + 4, &pageOffset, 4);
}
static inline void decodeOverflowPtr(uint64_t overflowPtr, page_idx_t& pageIdx,
uint16_t& pageOffset) {
uint32_t& pageOffset) {
pageIdx = 0;
memcpy(&pageIdx, &overflowPtr, 4);
memcpy(&pageOffset, ((uint8_t*)&overflowPtr) + 4, 2);
memcpy(&pageOffset, ((uint8_t*)&overflowPtr) + 4, 4);
}

template<typename T>
Expand Down
5 changes: 5 additions & 0 deletions src/include/storage/compression/compression.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ union StorageValue {
static std::optional<StorageValue> readFromVector(const common::ValueVector& vector,
common::offset_t posInVector);
};
static_assert(std::is_trivial_v<StorageValue>);

// Returns the size of the data type in bytes
uint32_t getDataTypeSizeInChunk(const common::LogicalType& dataType);
Expand All @@ -114,6 +115,7 @@ struct CompressionMetadata {
StorageValue min;
StorageValue max;
CompressionType compression;
uint8_t _padding[7]{};

CompressionMetadata(StorageValue min, StorageValue max, CompressionType compression)
: min(min), max(max), compression(compression) {}
Expand All @@ -130,6 +132,9 @@ struct CompressionMetadata {

std::string toString(const common::PhysicalTypeID physicalType) const;
};
// Padding should be kept to a minimum, but must be stored explicitly for consistent binary output
// when writing the padding to disk.
static_assert(sizeof(CompressionMetadata) == sizeof(StorageValue) * 2 + 8);

class CompressionAlg {
public:
Expand Down
35 changes: 19 additions & 16 deletions src/include/storage/index/hash_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,18 @@ class BufferManager;
class OverflowFileHandle;
template<typename T>
class DiskArray;
class DiskArrayCollection;

template<typename T>
class HashIndexLocalStorage;

class OnDiskHashIndex {
public:
virtual ~OnDiskHashIndex() = default;
virtual void prepareCommit() = 0;
virtual bool prepareCommit() = 0;
virtual void prepareRollback() = 0;
virtual void checkpointInMemory() = 0;
virtual void rollbackInMemory() = 0;
virtual bool checkpointInMemory() = 0;
virtual bool rollbackInMemory() = 0;
virtual void bulkReserve(uint64_t numValuesToAppend) = 0;
};

Expand All @@ -65,11 +66,11 @@ class OnDiskHashIndex {
// S is the stored type, which is usually the same as T, with the exception of strings
template<typename T>
class HashIndex final : public OnDiskHashIndex {

public:
HashIndex(const DBFileIDAndName& dbFileIDAndName,
const std::shared_ptr<BMFileHandle>& fileHandle, OverflowFileHandle* overflowFileHandle,
uint64_t indexPos, BufferManager& bufferManager, WAL* wal);
DiskArrayCollection& diskArrays, uint64_t indexPos, BufferManager& bufferManager, WAL* wal,
const HashIndexHeader& indexHeaderForReadTrx, HashIndexHeader& indexHeaderForWriteTrx);

~HashIndex() override;

Expand All @@ -86,10 +87,10 @@ class HashIndex final : public OnDiskHashIndex {
// or the index of the first value which cannot be inserted.
size_t append(const IndexBuffer<BufferKeyType>& buffer);

void prepareCommit() override;
bool prepareCommit() override;
void prepareRollback() override;
void checkpointInMemory() override;
void rollbackInMemory() override;
bool checkpointInMemory() override;
bool rollbackInMemory() override;
inline BMFileHandle* getFileHandle() const { return fileHandle.get(); }

private:
Expand Down Expand Up @@ -219,8 +220,8 @@ class HashIndex final : public OnDiskHashIndex {
std::unique_ptr<DiskArray<Slot<T>>> oSlots;
OverflowFileHandle* overflowFileHandle;
std::unique_ptr<HashIndexLocalStorage<T>> localStorage;
std::unique_ptr<HashIndexHeader> indexHeaderForReadTrx;
std::unique_ptr<HashIndexHeader> indexHeaderForWriteTrx;
const HashIndexHeader& indexHeaderForReadTrx;
HashIndexHeader& indexHeaderForWriteTrx;
};

template<>
Expand Down Expand Up @@ -317,19 +318,21 @@ class PrimaryKeyIndex {
static void createEmptyHashIndexFiles(common::PhysicalTypeID typeID, const std::string& fName,
common::VirtualFileSystem* vfs, main::ClientContext* context);

void writeHeaders();

private:
// When doing batch inserts, prepareCommit needs to be run before the COPY TABLE record is
// logged to the WAL file, since the index is reloaded when that record is replayed. However
// prepareCommit will also be run later, and the local storage can't cleared from the
// HashIndices until checkpointing is done, and entries will get added twice if
// HashIndex::prepareCommit is run twice. It seems simplest to just track whether or not
// prepareCommit has been run.
bool hasRunPrepareCommit;
common::PhysicalTypeID keyDataTypeID;
std::shared_ptr<BMFileHandle> fileHandle;
std::unique_ptr<OverflowFile> overflowFile;
std::vector<std::unique_ptr<OnDiskHashIndex>> hashIndices;
std::vector<HashIndexHeader> hashIndexHeadersForReadTrx;
std::vector<HashIndexHeader> hashIndexHeadersForWriteTrx;
BufferManager& bufferManager;
DBFileIDAndName dbFileIDAndName;
WAL& wal;
// Stores both primary and overflow slots
std::unique_ptr<DiskArrayCollection> hashIndexDiskArrays;
};

} // namespace storage
Expand Down
33 changes: 26 additions & 7 deletions src/include/storage/index/hash_index_header.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
#pragma once

#include "common/types/types.h"
#include "hash_index_slot.h"

namespace kuzu {
namespace storage {

struct HashIndexHeaderOnDisk {
explicit HashIndexHeaderOnDisk()
: nextSplitSlotId{0}, numEntries{0},
firstFreeOverflowSlotId{SlotHeader::INVALID_OVERFLOW_SLOT_ID}, currentLevel{0} {}
slot_id_t nextSplitSlotId;
uint64_t numEntries;
slot_id_t firstFreeOverflowSlotId;
uint8_t currentLevel;
uint8_t _padding[7]{};
};
static_assert(std::has_unique_object_representations_v<HashIndexHeaderOnDisk>);

class HashIndexHeader {
public:
explicit HashIndexHeader(common::PhysicalTypeID keyDataTypeID)
explicit HashIndexHeader()
: currentLevel{1}, levelHashMask{1}, higherLevelHashMask{3}, nextSplitSlotId{0},
numEntries{0}, keyDataTypeID{keyDataTypeID},
firstFreeOverflowSlotId{SlotHeader::INVALID_OVERFLOW_SLOT_ID} {}
numEntries{0}, firstFreeOverflowSlotId{SlotHeader::INVALID_OVERFLOW_SLOT_ID} {}

// Used for element initialization in disk array only.
HashIndexHeader() : HashIndexHeader(common::PhysicalTypeID::STRING) {}
explicit HashIndexHeader(const HashIndexHeaderOnDisk& onDiskHeader)
: currentLevel{onDiskHeader.currentLevel}, levelHashMask{(1ull << this->currentLevel) - 1},
higherLevelHashMask{(1ull << (this->currentLevel + 1)) - 1},
nextSplitSlotId{onDiskHeader.nextSplitSlotId}, numEntries{onDiskHeader.numEntries},
firstFreeOverflowSlotId{onDiskHeader.firstFreeOverflowSlotId} {}

inline void incrementLevel() {
currentLevel++;
Expand All @@ -30,14 +43,20 @@ class HashIndexHeader {
}
}

inline void write(HashIndexHeaderOnDisk& onDiskHeader) const {
onDiskHeader.currentLevel = currentLevel;
onDiskHeader.nextSplitSlotId = nextSplitSlotId;
onDiskHeader.numEntries = numEntries;
onDiskHeader.firstFreeOverflowSlotId = firstFreeOverflowSlotId;
}

public:
uint64_t currentLevel;
uint64_t levelHashMask;
uint64_t higherLevelHashMask;
// Id of the next slot to split when resizing the hash index
slot_id_t nextSplitSlotId;
uint64_t numEntries;
common::PhysicalTypeID keyDataTypeID;
// Id of the first in a chain of empty overflow slots which have been reclaimed during slot
// splitting. The nextOvfSlotId field in the slot's header indicates the next slot in the chain.
// These slots should be used first when allocating new overflow slots
Expand Down
6 changes: 4 additions & 2 deletions src/include/storage/index/hash_index_slot.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <array>
#include <cstdint>

#include "common/constants.h"
Expand Down Expand Up @@ -43,13 +44,14 @@ class SlotHeader {
inline entry_pos_t numEntries() const { return std::popcount(validityMask); }

public:
uint8_t fingerprints[FINGERPRINT_CAPACITY];
std::array<uint8_t, FINGERPRINT_CAPACITY> fingerprints;
uint32_t validityMask;
slot_id_t nextOvfSlotId;
};

template<typename T>
struct SlotEntry {
SlotEntry() : key{}, value{} {}
T key;
common::offset_t value;

Expand All @@ -72,7 +74,7 @@ template<typename T>
struct Slot {
Slot() : header{}, entries{} {}
SlotHeader header;
SlotEntry<T> entries[getSlotCapacity<T>()];
std::array<SlotEntry<T>, getSlotCapacity<T>()> entries;
};

} // namespace storage
Expand Down
16 changes: 12 additions & 4 deletions src/include/storage/index/hash_index_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <cmath>

#include "common/constants.h"
#include "common/types/ku_string.h"
#include "common/types/types.h"
#include "function/hash/hash_functions.h"
Expand All @@ -13,12 +14,19 @@ namespace storage {
const uint64_t NUM_HASH_INDEXES_LOG2 = 8;
const uint64_t NUM_HASH_INDEXES = 1 << NUM_HASH_INDEXES_LOG2;

static constexpr common::page_idx_t INDEX_HEADER_ARRAY_HEADER_PAGE_IDX = 0;
static constexpr common::page_idx_t P_SLOTS_HEADER_PAGE_IDX = 1;
static constexpr common::page_idx_t O_SLOTS_HEADER_PAGE_IDX = 2;
static constexpr common::page_idx_t NUM_HEADER_PAGES = 3;
static constexpr common::page_idx_t INDEX_HEADER_PAGES = 2;
static constexpr uint64_t INDEX_HEADERS_PER_PAGE =
common::BufferPoolConstants::PAGE_4KB_SIZE / sizeof(HashIndexHeaderOnDisk);

static constexpr common::page_idx_t P_SLOTS_HEADER_PAGE_IDX = 0;
static constexpr common::page_idx_t O_SLOTS_HEADER_PAGE_IDX = 1;
static constexpr common::page_idx_t NUM_HEADER_PAGES = 2;
static constexpr uint64_t INDEX_HEADER_IDX_IN_ARRAY = 0;

// so that all 256 hash indexes can be stored in two pages, the HashIndexHeaderOnDisk must be
// smaller than 32 bytes
static_assert(NUM_HASH_INDEXES * sizeof(HashIndexHeaderOnDisk) <= 4096 * INDEX_HEADER_PAGES);

enum class SlotType : uint8_t { PRIMARY = 0, OVF = 1 };

struct SlotInfo {
Expand Down
8 changes: 4 additions & 4 deletions src/include/storage/stats/metadata_dah_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ namespace storage {

// DAH is the abbreviation for Disk Array Header.
struct MetadataDAHInfo {
common::page_idx_t dataDAHPageIdx = common::INVALID_PAGE_IDX;
common::page_idx_t nullDAHPageIdx = common::INVALID_PAGE_IDX;
common::page_idx_t dataDAHIdx = common::INVALID_PAGE_IDX;
common::page_idx_t nullDAHIdx = common::INVALID_PAGE_IDX;
std::vector<std::unique_ptr<MetadataDAHInfo>> childrenInfos;

MetadataDAHInfo() : MetadataDAHInfo{common::INVALID_PAGE_IDX, common::INVALID_PAGE_IDX} {}
explicit MetadataDAHInfo(common::page_idx_t dataDAHPageIdx)
: MetadataDAHInfo{dataDAHPageIdx, common::INVALID_PAGE_IDX} {}
MetadataDAHInfo(common::page_idx_t dataDAHPageIdx, common::page_idx_t nullDAHPageIdx)
: dataDAHPageIdx{dataDAHPageIdx}, nullDAHPageIdx{nullDAHPageIdx} {}
MetadataDAHInfo(common::page_idx_t dataDAHIdx, common::page_idx_t nullDAHIdx)
: dataDAHIdx{dataDAHIdx}, nullDAHIdx{nullDAHIdx} {}

std::unique_ptr<MetadataDAHInfo> copy();

Expand Down
7 changes: 3 additions & 4 deletions src/include/storage/stats/node_table_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
#include <set>

#include "common/vector/value_vector.h"
#include "storage/buffer_manager/buffer_manager.h"
#include "storage/stats/metadata_dah_info.h"
#include "storage/stats/table_statistics.h"
#include "storage/wal/wal.h"
#include "storage/storage_structure/disk_array_collection.h"

namespace kuzu {
namespace common {
Expand All @@ -19,8 +18,8 @@ namespace storage {

class NodeTableStatsAndDeletedIDs : public TableStatistics {
public:
NodeTableStatsAndDeletedIDs(BMFileHandle* metadataFH, const catalog::TableCatalogEntry& entry,
BufferManager* bufferManager, WAL* wal);
NodeTableStatsAndDeletedIDs(DiskArrayCollection& metadataDAC,
const catalog::TableCatalogEntry& entry);
NodeTableStatsAndDeletedIDs(common::table_id_t tableID, common::offset_t maxNodeOffset,
const std::vector<common::offset_t>& deletedNodeOffsets);
NodeTableStatsAndDeletedIDs(const NodeTableStatsAndDeletedIDs& other);
Expand Down
7 changes: 4 additions & 3 deletions src/include/storage/stats/nodes_store_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
namespace kuzu {
namespace storage {

class DiskArrayCollection;

// Manages the disk image of the maxNodeOffsets and deleted node IDs (per node table).
// Note: This class is *not* thread-safe.
class NodesStoreStatsAndDeletedIDs : public TablesStatistics {
public:
// Should be used when an already loaded database is started from a directory.
NodesStoreStatsAndDeletedIDs(const std::string& databasePath, BMFileHandle* metadataFH,
NodesStoreStatsAndDeletedIDs(const std::string& databasePath, DiskArrayCollection& metadataDAC,
BufferManager* bufferManager, WAL* wal, common::VirtualFileSystem* fs,
main::ClientContext* context);

Expand Down Expand Up @@ -44,8 +46,7 @@ class NodesStoreStatsAndDeletedIDs : public TablesStatistics {
protected:
std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableCatalogEntry* tableEntry) override {
return std::make_unique<NodeTableStatsAndDeletedIDs>(metadataFH, *tableEntry, bufferManager,
wal);
return std::make_unique<NodeTableStatsAndDeletedIDs>(metadataDAC, *tableEntry);
}

std::string getTableStatisticsFilePath(const std::string& directory,
Expand Down
6 changes: 3 additions & 3 deletions src/include/storage/stats/rel_table_statistics.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#pragma once

#include "common/enums/rel_direction.h"
#include "storage/buffer_manager/buffer_manager.h"
#include "storage/stats/metadata_dah_info.h"
#include "storage/stats/table_statistics.h"

namespace kuzu {
namespace storage {

class DiskArrayCollection;

class WAL;
class RelTableStats : public TableStatistics {
public:
RelTableStats(BMFileHandle* metadataFH, const catalog::TableCatalogEntry& tableEntry,
BufferManager* bufferManager, WAL* wal);
RelTableStats(DiskArrayCollection& metadataDAC, const catalog::TableCatalogEntry& tableEntry);
RelTableStats(uint64_t numRels, common::table_id_t tableID, common::offset_t nextRelOffset)
: TableStatistics{common::TableType::REL, numRels, tableID}, nextRelOffset{nextRelOffset} {}

Expand Down
6 changes: 4 additions & 2 deletions src/include/storage/stats/rels_store_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
namespace kuzu {
namespace storage {

class DiskArrayCollection;

// Manages the disk image of the numRels and numRelsPerDirectionBoundTable.
class RelsStoreStats : public TablesStatistics {
public:
// Should be used when an already loaded database is started from a directory.
RelsStoreStats(const std::string& databasePath, BMFileHandle* metadataFH,
RelsStoreStats(const std::string& databasePath, DiskArrayCollection& metadataDAC,
BufferManager* bufferManager, WAL* wal, common::VirtualFileSystem* vfs,
main::ClientContext* context);

Expand Down Expand Up @@ -43,7 +45,7 @@ class RelsStoreStats : public TablesStatistics {
protected:
std::unique_ptr<TableStatistics> constructTableStatistic(
catalog::TableCatalogEntry* tableEntry) override {
return std::make_unique<RelTableStats>(metadataFH, *tableEntry, bufferManager, wal);
return std::make_unique<RelTableStats>(metadataDAC, *tableEntry);
}

std::string getTableStatisticsFilePath(const std::string& directory,
Expand Down
Loading
Loading