Skip to content

Commit

Permalink
Merge pull request #2948 from kuzudb/add-max-db-size
Browse files Browse the repository at this point in the history
Add database config: max db size
  • Loading branch information
ray6080 committed Feb 27, 2024
2 parents f12ae4c + c7a8785 commit ab6a142
Show file tree
Hide file tree
Showing 16 changed files with 152 additions and 75 deletions.
4 changes: 2 additions & 2 deletions src/c_api/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@ void kuzu_database_set_logging_level(const char* logging_level) {
}

kuzu_system_config kuzu_default_system_config() {
return {
0 /*bufferPoolSize*/, 0 /*maxNumThreads*/, true /*enableCompression*/, false /*readOnly*/};
return {0 /*bufferPoolSize*/, 0 /*maxNumThreads*/, true /*enableCompression*/,
false /*readOnly*/, BufferPoolConstants::DEFAULT_VM_REGION_MAX_SIZE};
}
5 changes: 5 additions & 0 deletions src/include/c_api/kuzu.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ typedef struct {
// If true, open the database in read-only mode. No write transaction is allowed on the Database
// object. If false, open the database read-write.
bool read_only;
// The maximum size of the database in bytes. Note that this is introduced temporarily for now
// to get around with the default 8TB mmap address space limit under some environment. This
// will be removed once we implemente a better solution later. The value is default to 1 << 43
// (8TB) under 64-bit environment and 1GB under 32-bit one (see `DEFAULT_VM_REGION_MAX_SIZE`).
uint64_t max_db_size;
} kuzu_system_config;

/**
Expand Down
14 changes: 10 additions & 4 deletions src/include/main/database.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,26 @@ struct KUZU_API SystemConfig {
* @param bufferPoolSize Max size of the buffer pool in bytes.
* The larger the buffer pool, the more data from the database files is kept in memory,
* reducing the amount of File I/O
* @param maxNumThreads The maximum number of threads to use during query execution
* @param enableCompression Whether or not to compress data on-disk for supported types
* @param readOnly If true, the database is opened read-only. No write transaction is
* @param maxNumThreads The maximum number of threads to use during query execution
* @param enableCompression Whether or not to compress data on-disk for supported types
* @param readOnly If true, the database is opened read-only. No write transaction is
* allowed on the `Database` object. Multiple read-only `Database` objects can be created with
* the same database path. If false, the database is opened read-write. Under this mode,
* there must not be multiple `Database` objects created with the same database path.
* @param maxDBSize The maximum size of the database in bytes. Note that this is introduced
* temporarily for now to get around with the default 8TB mmap address space limit some
* environment. This will be removed once we implemente a better solution later. The value is
* default to 1 << 43 (8TB) under 64-bit environment and 1GB under 32-bit one (see
* `DEFAULT_VM_REGION_MAX_SIZE`).
*/
explicit SystemConfig(uint64_t bufferPoolSize = -1u, uint64_t maxNumThreads = 0,
bool enableCompression = true, bool readOnly = false);
bool enableCompression = true, bool readOnly = false, uint64_t maxDBSize = -1u);

uint64_t bufferPoolSize;
uint64_t maxNumThreads;
bool enableCompression;
bool readOnly;
uint64_t maxDBSize;
};

/**
Expand Down
4 changes: 3 additions & 1 deletion src/include/storage/buffer_manager/buffer_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class BufferManager {
public:
enum class PageReadPolicy : uint8_t { READ_PAGE = 0, DONT_READ_PAGE = 1 };

explicit BufferManager(uint64_t bufferPoolSize);
BufferManager(uint64_t bufferPoolSize, uint64_t maxDBSize);
~BufferManager() = default;

uint8_t* pin(BMFileHandle& fileHandle, common::page_idx_t pageIdx,
Expand Down Expand Up @@ -194,6 +194,8 @@ class BufferManager {
inline void clearEvictionQueue() { evictionQueue = std::make_unique<EvictionQueue>(); }

private:
static void verifySizeParams(uint64_t bufferPoolSize, uint64_t maxDBSize);

bool claimAFrame(
BMFileHandle& fileHandle, common::page_idx_t pageIdx, PageReadPolicy pageReadPolicy);
// Return number of bytes freed.
Expand Down
11 changes: 8 additions & 3 deletions src/main/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ using namespace kuzu::transaction;
namespace kuzu {
namespace main {

SystemConfig::SystemConfig(
uint64_t bufferPoolSize_, uint64_t maxNumThreads, bool enableCompression, bool readOnly)
SystemConfig::SystemConfig(uint64_t bufferPoolSize_, uint64_t maxNumThreads, bool enableCompression,
bool readOnly, uint64_t maxDBSize)
: maxNumThreads{maxNumThreads}, enableCompression{enableCompression}, readOnly(readOnly) {
if (bufferPoolSize_ == -1u || bufferPoolSize_ == 0) {
#if defined(_WIN32)
Expand All @@ -55,6 +55,10 @@ SystemConfig::SystemConfig(
if (maxNumThreads == 0) {
this->maxNumThreads = std::thread::hardware_concurrency();
}
if (maxDBSize == -1u) {
maxDBSize = BufferPoolConstants::DEFAULT_VM_REGION_MAX_SIZE;
}
this->maxDBSize = maxDBSize;
}

static void getLockFileFlagsAndType(bool readOnly, bool createNew, int& flags, FileLockType& lock) {
Expand All @@ -70,7 +74,8 @@ Database::Database(std::string_view databasePath, SystemConfig systemConfig)
initLoggers();
logger = LoggerUtils::getLogger(LoggerConstants::LoggerEnum::DATABASE);
vfs = std::make_unique<VirtualFileSystem>();
bufferManager = std::make_unique<BufferManager>(this->systemConfig.bufferPoolSize);
bufferManager = std::make_unique<BufferManager>(
this->systemConfig.bufferPoolSize, this->systemConfig.maxDBSize);
memoryManager = std::make_unique<MemoryManager>(bufferManager.get(), vfs.get());
queryProcessor = std::make_unique<processor::QueryProcessor>(this->systemConfig.maxNumThreads);
initDBDirAndCoreFilesIfNecessary();
Expand Down
24 changes: 18 additions & 6 deletions src/storage/buffer_manager/buffer_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,30 @@ void EvictionQueue::removeCandidatesForFile(kuzu::storage::BMFileHandle& fileHan
}
}

BufferManager::BufferManager(uint64_t bufferPoolSize)
BufferManager::BufferManager(uint64_t bufferPoolSize, uint64_t maxDBSize)
: usedMemory{0}, bufferPoolSize{bufferPoolSize}, numEvictionQueueInsertions{0} {
if (bufferPoolSize < BufferPoolConstants::PAGE_4KB_SIZE) {
throw BufferManagerException("The given buffer pool size should be at least 4KB.");
}
verifySizeParams(bufferPoolSize, maxDBSize);
vmRegions.resize(2);
vmRegions[0] = std::make_unique<VMRegion>(
PageSizeClass::PAGE_4KB, BufferPoolConstants::DEFAULT_VM_REGION_MAX_SIZE);
vmRegions[0] = std::make_unique<VMRegion>(PageSizeClass::PAGE_4KB, maxDBSize);
vmRegions[1] = std::make_unique<VMRegion>(PageSizeClass::PAGE_256KB, bufferPoolSize);
evictionQueue = std::make_unique<EvictionQueue>();
}

void BufferManager::verifySizeParams(uint64_t bufferPoolSize, uint64_t maxDBSize) {
if (bufferPoolSize < BufferPoolConstants::PAGE_4KB_SIZE) {
throw BufferManagerException("The given buffer pool size should be at least 4KB.");
}
if (maxDBSize < BufferPoolConstants::PAGE_4KB_SIZE * StorageConstants::PAGE_GROUP_SIZE) {
throw BufferManagerException(
"The given max db size should be at least " +
std::to_string(BufferPoolConstants::PAGE_4KB_SIZE * StorageConstants::PAGE_GROUP_SIZE) +
" bytes.");
}
if ((maxDBSize & (maxDBSize - 1)) != 0) {
throw BufferManagerException("The given max db size should be a power of 2.");
}
}

// Important Note: Pin returns a raw pointer to the frame. This is potentially very dangerous and
// trusts the caller is going to protect this memory space.
// Important responsibilities for the caller are:
Expand Down
3 changes: 2 additions & 1 deletion test/ddl/e2e_ddl_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ class TinySnbDDLTest : public DBTest {
catalog = getCatalog(*database);
profiler = std::make_unique<Profiler>();
bufferManager = std::make_unique<BufferManager>(
BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING,
BufferPoolConstants::DEFAULT_VM_REGION_MAX_SIZE);
memoryManager =
std::make_unique<MemoryManager>(bufferManager.get(), getFileSystem(*database));
executionContext =
Expand Down
2 changes: 1 addition & 1 deletion test/main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ if(MSVC)
udf_test.cpp)
else()
add_kuzu_api_test(main_test
access_mode_test.cpp
system_config_test.cpp
arrow_test.cpp
connection_test.cpp
db_locking_test.cpp
Expand Down
30 changes: 0 additions & 30 deletions test/main/access_mode_test.cpp

This file was deleted.

69 changes: 69 additions & 0 deletions test/main/system_config_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include "common/exception/buffer_manager.h"
#include "main_test_helper/main_test_helper.h"

using namespace kuzu::common;
using namespace kuzu::testing;
using namespace kuzu::main;

class SystemConfigTest : public ApiTest {};

void assertQuery(QueryResult& result) {
auto a = result.toString();
ASSERT_TRUE(result.isSuccess()) << result.toString();
}

TEST_F(SystemConfigTest, testAccessMode) {
systemConfig->readOnly = false;
auto db = std::make_unique<Database>(databasePath, *systemConfig);
auto con = std::make_unique<Connection>(db.get());
assertQuery(
*con->query("CREATE NODE TABLE Person1(name STRING, age INT64, PRIMARY KEY(name))"));
assertQuery(*con->query("CREATE (:Person1 {name: 'Alice', age: 25})"));
assertQuery(*con->query("MATCH (:Person1) RETURN COUNT(*)"));
db.reset();
systemConfig->readOnly = true;
std::unique_ptr<Database> db2;
std::unique_ptr<Connection> con2;
EXPECT_NO_THROW(db2 = std::make_unique<Database>(databasePath, *systemConfig));
EXPECT_NO_THROW(con2 = std::make_unique<Connection>(db2.get()));
ASSERT_FALSE(con2->query("DROP TABLE Person")->isSuccess());
EXPECT_NO_THROW(con2->query("MATCH (:Person) RETURN COUNT(*)"));
}

TEST_F(SystemConfigTest, testMaxDBSize) {
systemConfig->maxDBSize = 1024;
try {
auto db = std::make_unique<Database>(databasePath, *systemConfig);
} catch (BufferManagerException e) {
ASSERT_EQ(std::string(e.what()),
"Buffer manager exception: The given max db size should be at least 4194304 bytes.");
}
systemConfig->maxDBSize = 4194305;
try {
auto db = std::make_unique<Database>(databasePath, *systemConfig);
} catch (BufferManagerException e) {
ASSERT_EQ(std::string(e.what()),
"Buffer manager exception: The given max db size should be a power of 2.");
}
systemConfig->maxDBSize = 4194304;
try {
auto db = std::make_unique<Database>(databasePath, *systemConfig);
} catch (BufferManagerException e) {
ASSERT_EQ(std::string(e.what()),
"Buffer manager exception: No more frame groups can be added to the allocator.");
}
systemConfig->maxDBSize = 1ull << 30;
EXPECT_NO_THROW(auto db = std::make_unique<Database>(databasePath, *systemConfig));
}

TEST_F(SystemConfigTest, testBufferPoolSize) {
systemConfig->bufferPoolSize = 1024;
try {
auto db = std::make_unique<Database>(databasePath, *systemConfig);
} catch (BufferManagerException e) {
ASSERT_EQ(std::string(e.what()),
"Buffer manager exception: The given buffer pool size should be at least 4KB.");
}
systemConfig->bufferPoolSize = BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING;
EXPECT_NO_THROW(auto db = std::make_unique<Database>(databasePath, *systemConfig));
}
3 changes: 2 additions & 1 deletion test/transaction/transaction_manager_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ class TransactionManagerTest : public EmptyDBTest {
std::filesystem::create_directory(databasePath);
createDBAndConn();
bufferManager = getBufferManager(*database);
std::make_unique<BufferManager>(BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING);
std::make_unique<BufferManager>(BufferPoolConstants::DEFAULT_BUFFER_POOL_SIZE_FOR_TESTING,
BufferPoolConstants::DEFAULT_VM_REGION_MAX_SIZE);
wal = std::make_unique<WAL>(
databasePath, false /* readOnly */, *bufferManager, getFileSystem(*database));
transactionManager =
Expand Down
1 change: 1 addition & 0 deletions tools/nodejs_api/src_cpp/include/node_database.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class NodeDatabase : public Napi::ObjectWrap<NodeDatabase> {
size_t bufferPoolSize;
bool enableCompression;
bool readOnly;
uint64_t maxDBSize;
std::shared_ptr<Database> database;
};

Expand Down
2 changes: 1 addition & 1 deletion tools/python_api/src_cpp/include/py_database.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class PyDatabase {
static void initialize(py::handle& m);

explicit PyDatabase(const std::string& databasePath, uint64_t bufferPoolSize,
uint64_t maxNumThreads, bool compression, bool readOnly);
uint64_t maxNumThreads, bool compression, bool readOnly, uint64_t maxDBSize);

~PyDatabase();

Expand Down
19 changes: 9 additions & 10 deletions tools/python_api/src_cpp/py_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ using namespace kuzu::common;

void PyDatabase::initialize(py::handle& m) {
py::class_<PyDatabase>(m, "Database")
.def(py::init<const std::string&, uint64_t, uint64_t, bool, bool>(),
.def(py::init<const std::string&, uint64_t, uint64_t, bool, bool, uint64_t>(),
py::arg("database_path"), py::arg("buffer_pool_size") = 0,
py::arg("max_num_threads") = 0, py::arg("compression") = true,
py::arg("read_only") = false)
py::arg("read_only") = false, py::arg("max_db_size") = (uint64_t)1 << 43)
.def("set_logging_level", &PyDatabase::setLoggingLevel, py::arg("logging_level"))
.def("scan_node_table_as_int64", &PyDatabase::scanNodeTable<std::int64_t>,
py::arg("table_name"), py::arg("prop_name"), py::arg("indices"), py::arg("np_array"),
Expand All @@ -23,19 +23,18 @@ void PyDatabase::initialize(py::handle& m) {
.def("scan_node_table_as_int16", &PyDatabase::scanNodeTable<std::int16_t>,
py::arg("table_name"), py::arg("prop_name"), py::arg("indices"), py::arg("np_array"),
py::arg("num_threads"))
.def("scan_node_table_as_double", &PyDatabase::scanNodeTable<double>,
py::arg("table_name"), py::arg("prop_name"), py::arg("indices"), py::arg("np_array"),
py::arg("num_threads"))
.def("scan_node_table_as_float", &PyDatabase::scanNodeTable<float>,
py::arg("table_name"), py::arg("prop_name"), py::arg("indices"), py::arg("np_array"),
py::arg("num_threads"))
.def("scan_node_table_as_double", &PyDatabase::scanNodeTable<double>, py::arg("table_name"),
py::arg("prop_name"), py::arg("indices"), py::arg("np_array"), py::arg("num_threads"))
.def("scan_node_table_as_float", &PyDatabase::scanNodeTable<float>, py::arg("table_name"),
py::arg("prop_name"), py::arg("indices"), py::arg("np_array"), py::arg("num_threads"))
.def("scan_node_table_as_bool", &PyDatabase::scanNodeTable<bool>, py::arg("table_name"),
py::arg("prop_name"), py::arg("indices"), py::arg("np_array"), py::arg("num_threads"));
}

PyDatabase::PyDatabase(const std::string& databasePath, uint64_t bufferPoolSize,
uint64_t maxNumThreads, bool compression, bool readOnly) {
auto systemConfig = SystemConfig(bufferPoolSize, maxNumThreads, compression, readOnly);
uint64_t maxNumThreads, bool compression, bool readOnly, uint64_t maxDBSize) {
auto systemConfig =
SystemConfig(bufferPoolSize, maxNumThreads, compression, readOnly, maxDBSize);
database = std::make_unique<Database>(databasePath, systemConfig);
database->addBuiltInFunction(READ_PANDAS_FUNC_NAME, kuzu::PandasScanFunction::getFunctionSet());
storageDriver = std::make_unique<kuzu::main::StorageDriver>(database.get());
Expand Down
25 changes: 10 additions & 15 deletions tools/python_api/src_py/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Database:
"""

def __init__(self, database_path, buffer_pool_size=0, max_num_threads=0, compression=True, lazy_init=False,
read_only=False):
read_only=False, max_db_size= 1 << 43):
"""
Parameters
----------
Expand All @@ -33,12 +33,20 @@ def __init__(self, database_path, buffer_pool_size=0, max_num_threads=0, compres
database path.
Default to False.
max_db_size : int
The maximum size of the database in bytes. Note that this is introduced
temporarily for now to get around with the default 8TB mmap address
space limit some environment. This will be removed once we implemente
a better solution later. The value is default to 1 << 43 (8TB) under 64-bit
environment and 1GB under 32-bit one.
"""
self.database_path = database_path
self.buffer_pool_size = buffer_pool_size
self.max_num_threads = max_num_threads
self.compression = compression
self.read_only = read_only
self.max_db_size = max_db_size
self._database = None
if not lazy_init:
self.init_database()
Expand All @@ -60,20 +68,7 @@ def init_database(self):
if self._database is None:
self._database = _kuzu.Database(self.database_path,
self.buffer_pool_size, self.max_num_threads, self.compression,
self.read_only)

def resize_buffer_manager(self, new_size):
"""
Resize the mamimum size of buffer pool.
Parameters
----------
new_size : int
New maximum size of buffer pool (in bytes).
"""

self._database.resize_buffer_manager(new_size)
self.read_only, self.max_db_size)

def set_logging_level(self, level):
"""
Expand Down
11 changes: 11 additions & 0 deletions tools/python_api/test/test_exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ def test_read_only_exception(establish_connection):
with pytest.raises(RuntimeError, match="Cannot execute write operations in a read-only database!"):
conn.execute("CREATE NODE TABLE test (id INT64, PRIMARY KEY(id));")

def test_max_db_size_exception():
with pytest.raises(RuntimeError, match="Buffer manager exception: The given max db size should be at least 4194304 bytes."):
kuzu.Database("test.db", max_db_size=1024)

with pytest.raises(RuntimeError, match="Buffer manager exception: The given max db size should be a power of 2."):
kuzu.Database("test.db", max_db_size=4194305)

def test_buffer_pool_size_exception():
with pytest.raises(RuntimeError, match="Buffer manager exception: The given buffer pool size should be at least 4KB."):
kuzu.Database("test.db", buffer_pool_size=1024)

def test_query_exception(establish_connection):
conn, db = establish_connection
with pytest.raises(RuntimeError, match="Binder exception: Table nonexisting does not exist."):
Expand Down

0 comments on commit ab6a142

Please sign in to comment.