diff --git a/.github/workflows/sanity_check.yml b/.github/workflows/sanity_check.yml
index f16b79d4d..5ca59c378 100644
--- a/.github/workflows/sanity_check.yml
+++ b/.github/workflows/sanity_check.yml
@@ -34,8 +34,8 @@ jobs:
with:
args: https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py
- - name: Check format
- run: VERBOSE_CHECK=1 make check-format
+ #- name: Check format
+ #run: VERBOSE_CHECK=1 make check-format
- name: Compare buckify output
run: make check-buck-targets
diff --git a/.gitignore b/.gitignore
index 489ad62a5..66cdac58b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ rocksdb.pc
*.dylib*
*.gcda
*.gcno
+*.log
*.o
*.o.tmp
*.so
@@ -25,6 +26,7 @@ rocksdb.pc
*.vcxproj
*.vcxproj.filters
*.sln
+*.sst
*.cmake
.watchmanconfig
CMakeCache.txt
@@ -94,3 +96,4 @@ fuzz/crash-*
cmake-build-*
third-party/folly/
+*_dbg
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..791e51fd9
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "sideplugin/rockside"]
+ path = sideplugin/rockside
+ url = https://github.com/topling/rockside.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f43d668bd..e0b14952e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -175,6 +175,8 @@ else()
endif()
endif()
+include_directories(sideplugin/rockside/src)
+
option(WITH_MD_LIBRARY "build with MD" ON)
if(WIN32 AND MSVC)
if(WITH_MD_LIBRARY)
@@ -185,6 +187,11 @@ if(WIN32 AND MSVC)
endif()
if(MSVC)
+ if(MSVC_VERSION LESS 1926)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /experimental:preprocessor")
+ else()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:preprocessor")
+ endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W4 /wd4127 /wd4800 /wd4996 /wd4351 /wd4100 /wd4204 /wd4324")
else()
@@ -592,8 +599,40 @@ endif()
find_package(Threads REQUIRED)
# Main library source code
+if (EXISTS ${PROJECT_SOURCE_DIR}/sideplugin/topling-rocks/CMakeFileList.txt)
+ message(STATUS "found ${PROJECT_SOURCE_DIR}/sideplugin/topling-rocks/CMakeFileList.txt")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ include(${PROJECT_SOURCE_DIR}/sideplugin/topling-rocks/CMakeFileList.txt)
+else()
+ message(STATUS "not found ${PROJECT_SOURCE_DIR}/sideplugin/topling-rocks/CMakeFileList.txt")
+endif()
+
+if (EXISTS ${PROJECT_SOURCE_DIR}/sideplugin/rockside/CMakeFileList.txt)
+ message(STATUS "found ${PROJECT_SOURCE_DIR}/sideplugin/rockside/CMakeFileList.txt")
+ include(${PROJECT_SOURCE_DIR}/sideplugin/rockside/CMakeFileList.txt)
+else()
+ message(STATUS "not found ${PROJECT_SOURCE_DIR}/sideplugin/rockside/CMakeFileList.txt")
+endif()
+
+set (cspp_memtab ${PROJECT_SOURCE_DIR}/sideplugin/cspp-memtable/cspp_memtable.cc)
+if (EXISTS ${cspp_memtab})
+ message(STATUS "found ${cspp_memtab}")
+ set (topling_rocks_src ${topling_rocks_src} ${cspp_memtab})
+else()
+ message(STATUS "not found ${cspp_memtab}")
+endif()
+
+set (cspp_wbwi ${PROJECT_SOURCE_DIR}/sideplugin/cspp-wbwi/cspp_wbwi.cc)
+if (EXISTS ${cspp_wbwi})
+ message(STATUS "found ${cspp_wbwi}")
+ set (topling_rocks_src ${topling_rocks_src} ${cspp_wbwi})
+else()
+ message(STATUS "not found ${cspp_wbwi}")
+endif()
set(SOURCES
+ ${rockside_src}
+ ${topling_rocks_src}
cache/cache.cc
cache/cache_entry_roles.cc
cache/cache_key.cc
@@ -621,6 +660,7 @@ set(SOURCES
db/builder.cc
db/c.cc
db/column_family.cc
+ db/compaction/compaction_executor.cc
db/compaction/compaction.cc
db/compaction/compaction_iterator.cc
db/compaction/compaction_picker.cc
diff --git a/COPYING b/COPYING
index d159169d1..efc5ad579 100644
--- a/COPYING
+++ b/COPYING
@@ -1,3 +1,8 @@
+Copyright (c) 2021 The ToplingDB Authors. All rights reserved.
+
+We disallow bytedance using this software, other terms are identical with
+GPLv2 License, see below:
+---------------------------------------------------------------------------
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
diff --git a/LICENSE.Apache b/LICENSE.Apache
index d64569567..60939d8bc 100644
--- a/LICENSE.Apache
+++ b/LICENSE.Apache
@@ -1,3 +1,8 @@
+Copyright (c) 2021 The ToplingDB Authors. All rights reserved.
+
+We disallow bytedance using this software, other terms are identical with
+Apache License, see below:
+---------------------------------------------------------------------------
Apache License
Version 2.0, January 2004
diff --git a/LICENSE.leveldb b/LICENSE.leveldb
index 7108b0bfb..a9f6bb5a5 100644
--- a/LICENSE.leveldb
+++ b/LICENSE.leveldb
@@ -1,3 +1,9 @@
+Copyright (c) 2021 The ToplingDB Authors. All rights reserved.
+
+We disallow bytedance using this software, other terms are identical with
+original license, see below:
+---------------------------------------------------------------------------
+
This contains code that is from LevelDB, and that code is under the following license:
Copyright (c) 2011 The LevelDB Authors. All rights reserved.
diff --git a/Makefile b/Makefile
index 4a39fe09a..7cd901b74 100644
--- a/Makefile
+++ b/Makefile
@@ -18,6 +18,15 @@ MACHINE ?= $(shell uname -m)
ARFLAGS = ${EXTRA_ARFLAGS} rs
STRIPFLAGS = -S -x
+# beg topling specific
+DISABLE_WARNING_AS_ERROR=1
+LIB_MODE=shared
+USE_RTTI=1
+ROCKSDB_USE_IO_URING=0
+ROCKSDB_DISABLE_TCMALLOC=1
+SKIP_FORMAT_BUCK_CHECKS=1
+# end topling specific
+
# Transform parallel LOG output into something more readable.
perl_command = perl -n \
-e '@a=split("\t",$$_,-1); $$t=$$a[8];' \
@@ -109,6 +118,9 @@ endif
# In that case, the compiler default (`-O0` for gcc and clang) will be used.
OPT += $(OPTIMIZE_LEVEL)
+ifeq ($(WITH_FRAME_POINTER),1)
+OPT += -fno-omit-frame-pointer
+else
# compile with -O2 if debug level is not 2
ifneq ($(DEBUG_LEVEL), 2)
OPT += -fno-omit-frame-pointer
@@ -117,6 +129,7 @@ ifeq (,$(shell $(CXX) -fsyntax-only -momit-leaf-frame-pointer -xc /dev/null 2>&1
OPT += -momit-leaf-frame-pointer
endif
endif
+endif
ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1))
CXXFLAGS += -DHAS_ALTIVEC
@@ -188,6 +201,203 @@ endif
#-----------------------------------------------
include src.mk
+# ROCKSDB_NO_DYNAMIC_EXTENSION makes dll load twice, disable it
+CXXFLAGS += -DROCKSDB_NO_DYNAMIC_EXTENSION
+
+# civetweb show server stats
+CXXFLAGS += -DUSE_SERVER_STATS=1
+CFLAGS += -DUSE_SERVER_STATS=1
+
+# civetweb-v1.15 requires OPENSSL_API_1_1 or OPENSSL_API_1_0
+CXXFLAGS += -DOPENSSL_API_1_1=1
+CFLAGS += -DOPENSSL_API_1_1=1
+
+ifeq (,$(wildcard sideplugin/rockside/3rdparty/rapidyaml))
+ $(warning NotFound sideplugin/rockside/3rdparty/rapidyaml)
+ $(warning sideplugin/rockside is a submodule, auto init...)
+ IsCloneOK := $(shell \
+ set -x -e; \
+ git submodule update --init --recursive >&2; \
+ echo $$?\
+ )
+ ifneq ("${IsCloneOK}","0")
+ $(error "IsCloneOK=${IsCloneOK} Error cloning rockside, stop!")
+ endif
+endif
+EXTRA_LIB_SOURCES += sideplugin/rockside/src/topling/rapidyaml_all.cc
+CXXFLAGS += -Isideplugin/rockside/3rdparty/rapidyaml \
+ -Isideplugin/rockside/3rdparty/rapidyaml/src \
+ -Isideplugin/rockside/3rdparty/rapidyaml/ext/c4core/src \
+ -DSIDE_PLUGIN_WITH_YAML=1
+
+# topling-core is topling private
+ifneq (,$(wildcard sideplugin/topling-core))
+ TOPLING_CORE_DIR := sideplugin/topling-core
+else
+ # topling-zip is topling public
+ ifeq (,$(wildcard sideplugin/topling-zip))
+ $(warning sideplugin/topling-zip is not present, clone it from github...)
+ IsCloneOK := $(shell \
+ set -x -e; \
+ cd sideplugin; \
+ git clone https://github.com/topling/topling-zip.git >&2; \
+ cd topling-zip; \
+ git submodule update --init --recursive >&2; \
+ echo $$?\
+ )
+ ifneq ("${IsCloneOK}","0")
+ $(error "IsCloneOK=${IsCloneOK} Error cloning topling-zip, stop!")
+ endif
+ endif
+ TOPLING_CORE_DIR := sideplugin/topling-zip
+endif
+
+COMPILER := $(shell set -e; tmpfile=`mktemp -u compiler-XXXXXX`; \
+ ${CXX} ${TOPLING_CORE_DIR}/tools/configure/compiler.cpp -o $${tmpfile}.exe; \
+ ./$${tmpfile}.exe && rm -f $${tmpfile}*)
+UNAME_MachineSystem := $(shell uname -m -s | sed 's:[ /]:-:g')
+WITH_BMI2 := $(shell bash ${TOPLING_CORE_DIR}/cpu_has_bmi2.sh)
+BUILD_NAME := ${UNAME_MachineSystem}-${COMPILER}-bmi2-${WITH_BMI2}
+BUILD_ROOT := build/${BUILD_NAME}
+ifeq (${DEBUG_LEVEL}, 0)
+ BUILD_TYPE_SIG := r
+ OBJ_DIR := ${BUILD_ROOT}/rls
+endif
+ifeq (${DEBUG_LEVEL}, 1)
+ BUILD_TYPE_SIG := a
+ OBJ_DIR := ${BUILD_ROOT}/afr
+endif
+ifeq (${DEBUG_LEVEL}, 2)
+ BUILD_TYPE_SIG := d
+ OBJ_DIR := ${BUILD_ROOT}/dbg
+endif
+ifneq ($(filter auto_all_tests check check_0 watch-log gen_parallel_tests %_test %_test2, $(MAKECMDGOALS)),)
+ CXXFLAGS += -DROCKSDB_UNIT_TEST
+ MAKE_UNIT_TEST := 1
+ OBJ_DIR := $(subst build/,build-ut/,${OBJ_DIR})
+endif
+
+# 1. we define ROCKSDB_DISABLE_ZSTD=1 on build_detect_platform.
+# 2. zstd lib is included in libterark-zbs
+# 3. we alway use ZSTD
+CXXFLAGS += -DZSTD \
+ -I${TOPLING_CORE_DIR}/3rdparty/zstd/zstd \
+ -I${TOPLING_CORE_DIR}/3rdparty/zstd/zstd/dictBuilder
+
+CXXFLAGS += \
+ -I${TOPLING_CORE_DIR}/src \
+ -I${TOPLING_CORE_DIR}/boost-include \
+ -I${TOPLING_CORE_DIR}/3rdparty/zstd
+
+LDFLAGS += -L${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared \
+ -lterark-{zbs,fsa,core}-${COMPILER}-${BUILD_TYPE_SIG}
+
+# default is 1, can be override
+AUTO_CLONE_TOPLING_ROCKS ?= 1
+ifeq (${AUTO_CLONE_TOPLING_ROCKS},1)
+ifeq (,$(wildcard sideplugin/topling-rocks))
+ # topling specific: just for people who has permission to topling-rocks
+ dummy := $(shell set -e -x; \
+ cd sideplugin; \
+ git clone git@github.com:rockeet/topling-rocks; \
+ cd topling-rocks; \
+ git submodule update --init --recursive \
+ )
+endif
+ifeq (,$(wildcard sideplugin/cspp-memtable))
+ # topling specific: just for people who has permission to cspp-memtable
+ dummy := $(shell set -e -x; \
+ cd sideplugin; \
+ git clone git@github.com:topling/cspp-memtable; \
+ cd cspp-memtable; \
+ )
+endif
+ifeq (,$(wildcard sideplugin/cspp-wbwi))
+ dummy := $(shell set -e -x; \
+ cd sideplugin; \
+ git clone git@github.com:topling/cspp-wbwi; \
+ cd cspp-wbwi; \
+ )
+endif
+endif
+
+ifneq (,$(wildcard sideplugin/cspp-memtable))
+ # now we have cspp-memtable
+ CXXFLAGS += -DHAS_TOPLING_CSPP_MEMTABLE
+ CSPP_MEMTABLE_GIT_VER_SRC = ${BUILD_ROOT}/git-version-cspp_memtable.cc
+ EXTRA_LIB_SOURCES += sideplugin/cspp-memtable/cspp_memtable.cc \
+ sideplugin/cspp-memtable/${CSPP_MEMTABLE_GIT_VER_SRC}
+else
+ $(warning NotFound sideplugin/cspp-memtable, this is ok, only Topling CSPP MemTab is disabled)
+endif
+
+ifneq (,$(wildcard sideplugin/cspp-wbwi))
+ # now we have cspp-wbwi
+ CXXFLAGS += -DHAS_TOPLING_CSPP_WBWI
+ CSPP_WBWI_GIT_VER_SRC = ${BUILD_ROOT}/git-version-cspp_wbwi.cc
+ EXTRA_LIB_SOURCES += sideplugin/cspp-wbwi/cspp_wbwi.cc \
+ sideplugin/cspp-wbwi/${CSPP_WBWI_GIT_VER_SRC}
+else
+ $(warning NotFound sideplugin/cspp-wbwi, this is ok, only Topling CSPP WBWI(WriteBatchWithIndex) is disabled)
+endif
+
+ifneq (,$(wildcard sideplugin/topling-rocks))
+ CXXFLAGS += -I sideplugin/topling-rocks/src
+ LDFLAGS += -lstdc++fs -lcurl
+ export LD_LIBRARY_PATH:=${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared:${LD_LIBRARY_PATH}
+ TOPLING_ROCKS_GIT_VER_SRC = ${BUILD_ROOT}/git-version-topling_rocks.cc
+ EXTRA_LIB_SOURCES += \
+ $(wildcard sideplugin/topling-rocks/src/dcompact/*.cc) \
+ $(wildcard sideplugin/topling-rocks/src/table/*.cc) \
+ sideplugin/topling-rocks/src/misc/show_sys_info.cc \
+ sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC}
+else
+ $(warning NotFound sideplugin/topling-rocks, this is ok, only Topling SST and Distributed Compaction are disabled)
+ ifeq (1,2) # Now link libterark-{zbs,fsa,core} instead
+ EXTRA_LIB_SOURCES += \
+ ${TOPLING_CORE_DIR}/src/terark/fstring.cpp \
+ ${TOPLING_CORE_DIR}/src/terark/hash_common.cpp \
+ ${TOPLING_CORE_DIR}/src/terark/util/throw.cpp
+ endif
+endif
+
+TOPLING_DCOMPACT_USE_ETCD := 0
+ifneq (,$(wildcard sideplugin/topling-rocks/3rdparty/etcd-cpp-apiv3/build/src/libetcd-cpp-api.${PLATFORM_SHARED_EXT}))
+ifneq (,$(wildcard sideplugin/topling-rocks/3rdparty/etcd-cpp-apiv3/build/proto/gen/proto))
+ CXXFLAGS += -I sideplugin/topling-rocks/3rdparty/etcd-cpp-apiv3/build/proto/gen/proto \
+ -I sideplugin/topling-rocks/3rdparty/etcd-cpp-apiv3
+ LDFLAGS += -L sideplugin/topling-rocks/3rdparty/etcd-cpp-apiv3/build/src -letcd-cpp-api
+ export LD_LIBRARY_PATH:=${TOPLING_ROCKS_DIR}/3rdparty/etcd-cpp-apiv3/build/src:${LD_LIBRARY_PATH}
+ ifneq (,$(wildcard ../vcpkg/packages/grpc_x64-linux/include))
+ CXXFLAGS += -I ../vcpkg/packages/grpc_x64-linux/include
+ else
+ $(error NotFound ../vcpkg/packages/grpc_x64-linux/include)
+ endif
+ ifneq (,$(wildcard ../vcpkg/packages/protobuf_x64-linux/include))
+ CXXFLAGS += -I ../vcpkg/packages/protobuf_x64-linux/include
+ else
+ $(error NotFound ../vcpkg/packages/protobuf_x64-linux/include)
+ endif
+ ifneq (,$(wildcard ../vcpkg/packages/cpprestsdk_x64-linux/include))
+ CXXFLAGS += -I ../vcpkg/packages/cpprestsdk_x64-linux/include
+ else
+ $(error NotFound ../vcpkg/packages/cpprestsdk_x64-linux/include)
+ endif
+ CXXFLAGS += -DTOPLING_DCOMPACT_USE_ETCD
+ TOPLING_DCOMPACT_USE_ETCD := 1
+endif
+endif
+
+ifeq (${TOPLING_DCOMPACT_USE_ETCD},0)
+ $(warning NotFound etcd-cpp-apiv3, this is ok, only etcd is disabled)
+endif
+
+#export ROCKSDB_KICK_OUT_OPTIONS_FILE=1
+
+# prepend EXTRA_LIB_SOURCES to LIB_SOURCES because
+# EXTRA_LIB_SOURCES single file compiling is slow
+LIB_SOURCES := ${EXTRA_LIB_SOURCES} ${LIB_SOURCES}
+
AM_DEFAULT_VERBOSITY ?= 0
AM_V_GEN = $(am__v_GEN_$(V))
@@ -219,7 +429,7 @@ am__v_AR_0 = @echo " AR " $@;
am__v_AR_1 =
AM_LINK = $(AM_V_CCLD)$(CXX) -L. $(patsubst lib%.a, -l%, $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^)) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
-AM_SHARE = $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$@ -L. $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^) $(EXEC_LDFLAGS) $(LDFLAGS) -o $@
+AM_SHARE = $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$@ -L. $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^) $(EXTRA_SHARED_LIB_LIB) $(EXEC_LDFLAGS) $(LDFLAGS) -o $@
# Detect what platform we're building on.
# Export some common variables that might have been passed as Make variables
@@ -232,6 +442,9 @@ dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \
export COMPILE_WITH_UBSAN="$(COMPILE_WITH_UBSAN)"; \
export PORTABLE="$(PORTABLE)"; \
export ROCKSDB_NO_FBCODE="$(ROCKSDB_NO_FBCODE)"; \
+ export ROCKSDB_USE_IO_URING="$(ROCKSDB_USE_IO_URING)"; \
+ export ROCKSDB_DISABLE_TCMALLOC="$(ROCKSDB_DISABLE_TCMALLOC)"; \
+ export ROCKSDB_DISABLE_ZSTD=1; \
export USE_CLANG="$(USE_CLANG)"; \
export LIB_MODE="$(LIB_MODE)"; \
export ROCKSDB_CXX_STANDARD="$(ROCKSDB_CXX_STANDARD)"; \
@@ -294,7 +507,7 @@ $(info $(shell $(CXX) --version))
endif
missing_make_config_paths := $(shell \
- grep "\./\S*\|/\S*" -o $(CURDIR)/make_config.mk | \
+ egrep "\.+/\S*|([a-z_]*)/\S*" -o $(CURDIR)/make_config.mk | \
while read path; \
do [ -e $$path ] || echo $$path; \
done | sort | uniq | grep -v "/DOES/NOT/EXIST")
@@ -305,8 +518,10 @@ $(foreach path, $(missing_make_config_paths), \
ifeq ($(PLATFORM), OS_AIX)
# no debug info
else ifneq ($(PLATFORM), IOS)
-CFLAGS += -g
-CXXFLAGS += -g
+# default disable dwarf
+DBG_DWARF ?=
+CFLAGS += ${DBG_DWARF} -g3
+CXXFLAGS += ${DBG_DWARF} -g3
else
# no debug info for IOS, that will make our library big
OPT += -DNDEBUG
@@ -502,6 +717,8 @@ ifndef DISABLE_WARNING_AS_ERROR
WARNING_FLAGS += -Werror
endif
+# topling specific WARNING_FLAGS
+WARNING_FLAGS := -Wall -Wno-shadow
ifdef LUA_PATH
@@ -534,6 +751,7 @@ ifeq ($(NO_THREEWAY_CRC32C), 1)
endif
CFLAGS += $(C_WARNING_FLAGS) $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
+CXXFLAGS += -Isideplugin/rockside/src
CXXFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers
# Allow offsetof to work on non-standard layout types. Some compiler could
@@ -543,10 +761,11 @@ CXXFLAGS += -Wno-invalid-offsetof
LDFLAGS += $(PLATFORM_LDFLAGS)
-LIB_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(LIB_SOURCES))
+LIB_OBJECTS := $(patsubst %.cc, $(OBJ_DIR)/%.o, $(LIB_SOURCES))
+LIB_OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o, $(LIB_OBJECTS))
LIB_OBJECTS += $(patsubst %.cc, $(OBJ_DIR)/%.o, $(ROCKSDB_PLUGIN_SOURCES))
-ifeq ($(HAVE_POWER8),1)
LIB_OBJECTS += $(patsubst %.c, $(OBJ_DIR)/%.o, $(LIB_SOURCES_C))
+ifeq ($(HAVE_POWER8),1)
LIB_OBJECTS += $(patsubst %.S, $(OBJ_DIR)/%.o, $(LIB_SOURCES_ASM))
endif
@@ -557,6 +776,9 @@ endif
# range_tree is not compatible with non GNU libc on ppc64
# see https://jira.percona.com/browse/PS-7559
ifneq ($(PPC_LIBC_IS_GNU),0)
+ # topling: should move this line above and delete LIB_OBJECTS += .., add here for min-diff principle
+ # add to LIB_SOURCES to generate *.cc.d dependency rules
+ LIB_SOURCES += ${RANGE_TREE_SOURCES}
LIB_OBJECTS += $(patsubst %.cc, $(OBJ_DIR)/%.o, $(RANGE_TREE_SOURCES))
endif
@@ -584,6 +806,13 @@ ALL_SOURCES += $(ROCKSDB_PLUGIN_SOURCES)
TESTS = $(patsubst %.cc, %, $(notdir $(TEST_MAIN_SOURCES)))
TESTS += $(patsubst %.c, %, $(notdir $(TEST_MAIN_SOURCES_C)))
+ifeq (${MAKE_UNIT_TEST},1)
+ ifeq (cspp,$(patsubst cspp:%,cspp,${DefaultWBWIFactory}))
+ # cspp WBWI does not support txn with ts(timestamp)
+ $(warning "test with CSPP_WBWI, skip write_committed_transaction_ts_test")
+ TESTS := $(filter-out write_committed_transaction_ts_test,${TESTS})
+ endif
+endif
# `make check-headers` to very that each header file includes its own
# dependencies
@@ -706,6 +935,7 @@ STATIC_LIBRARY = ${LIBNAME}$(LIBDEBUG).a
STATIC_TEST_LIBRARY = ${LIBNAME}_test$(LIBDEBUG).a
STATIC_TOOLS_LIBRARY = ${LIBNAME}_tools$(LIBDEBUG).a
STATIC_STRESS_LIBRARY = ${LIBNAME}_stress$(LIBDEBUG).a
+#$(error LIBDEBUG = ${LIBDEBUG} PLATFORM_SHARED_VERSIONED=${PLATFORM_SHARED_VERSIONED})
ALL_STATIC_LIBS = $(STATIC_LIBRARY) $(STATIC_TEST_LIBRARY) $(STATIC_TOOLS_LIBRARY) $(STATIC_STRESS_LIBRARY)
@@ -770,8 +1000,8 @@ default: all
#-----------------------------------------------
ifneq ($(PLATFORM_SHARED_EXT),)
-ifneq ($(PLATFORM_SHARED_VERSIONED),true)
SHARED1 = ${LIBNAME}$(LIBDEBUG).$(PLATFORM_SHARED_EXT)
+ifneq ($(PLATFORM_SHARED_VERSIONED),true)
SHARED2 = $(SHARED1)
SHARED3 = $(SHARED1)
SHARED4 = $(SHARED1)
@@ -780,7 +1010,6 @@ else
SHARED_MAJOR = $(ROCKSDB_MAJOR)
SHARED_MINOR = $(ROCKSDB_MINOR)
SHARED_PATCH = $(ROCKSDB_PATCH)
-SHARED1 = ${LIBNAME}.$(PLATFORM_SHARED_EXT)
ifeq ($(PLATFORM), OS_MACOSX)
SHARED_OSX = $(LIBNAME)$(LIBDEBUG).$(SHARED_MAJOR)
SHARED2 = $(SHARED_OSX).$(PLATFORM_SHARED_EXT)
@@ -801,7 +1030,7 @@ $(SHARED3): $(SHARED4)
endif # PLATFORM_SHARED_VERSIONED
$(SHARED4): $(LIB_OBJECTS)
- $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(LIB_OBJECTS) $(LDFLAGS) -o $@
+ $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(LIB_OBJECTS) $(EXTRA_SHARED_LIB_LIB) $(LDFLAGS) -o $@
endif # PLATFORM_SHARED_EXT
.PHONY: check clean coverage ldb_tests package dbg gen-pc build_size \
@@ -1257,6 +1486,14 @@ librocksdb_env_basic_test.a: $(OBJ_DIR)/env/env_basic_test.o $(LIB_OBJECTS) $(TE
db_bench: $(OBJ_DIR)/tools/db_bench.o $(BENCH_OBJECTS) $(TESTUTIL) $(LIBRARY)
$(AM_LINK)
+ifeq (${DEBUG_LEVEL},2)
+db_bench_dbg: $(OBJ_DIR)/tools/db_bench.o $(BENCH_OBJECTS) $(TESTUTIL) $(LIBRARY)
+ $(AM_LINK)
+endif
+ifeq (${DEBUG_LEVEL},0)
+db_bench_rls: $(OBJ_DIR)/tools/db_bench.o $(BENCH_OBJECTS) $(TESTUTIL) $(LIBRARY)
+ $(AM_LINK)
+endif
trace_analyzer: $(OBJ_DIR)/tools/trace_analyzer.o $(ANALYZE_OBJECTS) $(TOOLS_LIBRARY) $(LIBRARY)
$(AM_LINK)
@@ -1898,6 +2135,57 @@ io_tracer_parser_test: $(OBJ_DIR)/tools/io_tracer_parser_test.o $(OBJ_DIR)/tools
$(AM_LINK)
io_tracer_parser: $(OBJ_DIR)/tools/io_tracer_parser.o $(TOOLS_LIBRARY) $(LIBRARY)
+#--------------------------------------------------
+ifndef ROCKSDB_USE_LIBRADOS
+ AUTO_ALL_EXCLUDE_SRC += utilities/env_librados_test.cc
+ AUTO_ALL_EXCLUDE_SRC += utilities/env_mirror_test.cc
+endif
+
+AUTO_ALL_TESTS_SRC := $(shell find * -name '*_test.cc' -not -path 'java/*' -not -path '*/3rdparty/*') ${EXTRA_TESTS_SRC}
+AUTO_ALL_TESTS_SRC := $(filter-out ${AUTO_ALL_EXCLUDE_SRC},${AUTO_ALL_TESTS_SRC})
+AUTO_ALL_TESTS_OBJ := $(addprefix $(OBJ_DIR)/,$(AUTO_ALL_TESTS_SRC:%.cc=%.o))
+AUTO_ALL_TESTS_EXE := $(AUTO_ALL_TESTS_OBJ:%.o=%)
+
+define LN_TEST_TARGET
+t${DEBUG_LEVEL}/${1}: ${2}
+ mkdir -p $(dir $$@) && ln -sf `realpath ${2}` $$@
+
+endef
+#intentional one blank line above
+
+.PHONY: auto_all_tests
+auto_all_tests: ${AUTO_ALL_TESTS_EXE}
+
+$(OBJ_DIR)/tools/%_test: $(OBJ_DIR)/tools/%_test.o \
+ ${TOOLS_LIBRARY} $(TEST_LIBRARY) $(LIBRARY)
+ $(AM_LINK)
+
+$(OBJ_DIR)/%_test: $(OBJ_DIR)/%_test.o $(TEST_LIBRARY) $(LIBRARY)
+ $(AM_LINK)
+
+$(eval $(foreach test,${AUTO_ALL_TESTS_EXE},$(call LN_TEST_TARGET,$(notdir ${test}),${test})))
+
+$(OBJ_DIR)/tools/db_bench_tool_test : \
+$(OBJ_DIR)/tools/db_bench_tool_test.o \
+ ${BENCH_OBJECTS} $(TEST_LIBRARY) $(LIBRARY)
+ $(AM_LINK)
+
+$(OBJ_DIR)/file/prefetch_test : \
+$(OBJ_DIR)/file/prefetch_test.o \
+$(OBJ_DIR)/tools/io_tracer_parser_tool.o $(TEST_LIBRARY) $(LIBRARY)
+ $(AM_LINK)
+
+$(OBJ_DIR)/tools/trace_analyzer_test : \
+$(OBJ_DIR)/tools/trace_analyzer_test.o \
+ ${ANALYZE_OBJECTS} ${TOOLS_LIBRARY} $(TEST_LIBRARY) $(LIBRARY)
+ $(AM_LINK)
+
+$(OBJ_DIR)/tools/block_cache_analyzer/block_cache_trace_analyzer_test : \
+$(OBJ_DIR)/tools/block_cache_analyzer/block_cache_trace_analyzer_test.o \
+$(OBJ_DIR)/tools/block_cache_analyzer/block_cache_trace_analyzer.o $(TEST_LIBRARY) $(LIBRARY)
+ $(AM_LINK)
+
+$(OBJ_DIR)/%: $(OBJ_DIR)/%.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
db_blob_corruption_test: $(OBJ_DIR)/db/blob/db_blob_corruption_test.o $(TEST_LIBRARY) $(LIBRARY)
@@ -2452,7 +2740,7 @@ $(OBJ_DIR)/%.o: %.cpp
$(AM_V_CC)mkdir -p $(@D) && $(CXX) $(CXXFLAGS) -c $< -o $@ $(COVERAGEFLAGS)
$(OBJ_DIR)/%.o: %.c
- $(AM_V_CC)$(CC) $(CFLAGS) -c $< -o $@
+ $(AM_V_CC)mkdir -p $(@D) && $(CC) $(CFLAGS) -c $< -o $@
endif
# ---------------------------------------------------------------------------
@@ -2460,8 +2748,9 @@ endif
# ---------------------------------------------------------------------------
# If skip dependencies is ON, skip including the dep files
ifneq ($(SKIP_DEPENDS), 1)
-DEPFILES = $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, $(ALL_SOURCES))
-DEPFILES+ = $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C) $(TEST_MAIN_SOURCES_C))
+DEPFILES := $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, $(ALL_SOURCES))
+DEPFILES := $(patsubst %.cpp,$(OBJ_DIR)/%.cpp.d,$(DEPFILES))
+DEPFILES += $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C) $(TEST_MAIN_SOURCES_C))
ifeq ($(USE_FOLLY),1)
DEPFILES +=$(patsubst %.cpp, $(OBJ_DIR)/%.cpp.d, $(FOLLY_SOURCES))
endif
@@ -2475,12 +2764,12 @@ endif
$(OBJ_DIR)/%.cc.d: %.cc
@mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \
-MM -MT'$@' -MT'$(<:.cc=.o)' -MT'$(<:%.cc=$(OBJ_DIR)/%.o)' \
- "$<" -o '$@'
+ "$<" -o '$@'
$(OBJ_DIR)/%.cpp.d: %.cpp
@mkdir -p $(@D) && $(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) \
-MM -MT'$@' -MT'$(<:.cpp=.o)' -MT'$(<:%.cpp=$(OBJ_DIR)/%.o)' \
- "$<" -o '$@'
+ "$<" -o '$@'
ifeq ($(HAVE_POWER8),1)
DEPFILES_C = $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C))
@@ -2508,6 +2797,40 @@ build_subset_tests: $(ROCKSDBTESTS_SUBSET)
list_all_tests:
echo "$(ROCKSDBTESTS_SUBSET)"
+TOPLING_ZBS_TARGET := ${BUILD_ROOT}/lib_shared/libterark-zbs-${COMPILER}-${BUILD_TYPE_SIG}.${PLATFORM_SHARED_EXT}
+${SHARED4}: ${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}
+${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}: CXXFLAGS =
+${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}: LDFLAGS =
+${TOPLING_CORE_DIR}/${TOPLING_ZBS_TARGET}:
+ +make -C ${TOPLING_CORE_DIR} ${TOPLING_ZBS_TARGET}
+
+ifneq (,$(wildcard sideplugin/topling-rocks))
+sideplugin/topling-rocks/${TOPLING_ROCKS_GIT_VER_SRC}: \
+ $(shell find sideplugin/topling-rocks/{src,tools} -name '*.cc' -o -name '*.h')
+ +make -C sideplugin/topling-rocks ${TOPLING_ROCKS_GIT_VER_SRC}
+
+.PHONY: dcompact_worker
+dcompact_worker: ${SHARED1}
+ifeq (${MAKE_UNIT_TEST},1)
+ @echo rocksdb unit test, skip dcompact_worker
+else
+ +make -C sideplugin/topling-rocks/tools/dcompact ${OBJ_DIR}/dcompact_worker.exe CHECK_TERARK_FSA_LIB_UPDATE=0
+endif
+endif
+
+ifneq (,$(wildcard sideplugin/cspp-memtable))
+sideplugin/cspp-memtable/${CSPP_MEMTABLE_GIT_VER_SRC}: \
+ sideplugin/cspp-memtable/cspp_memtable.cc \
+ sideplugin/cspp-memtable/Makefile
+ +make -C sideplugin/cspp-memtable ${CSPP_MEMTABLE_GIT_VER_SRC}
+endif
+ifneq (,$(wildcard sideplugin/cspp-wbwi))
+sideplugin/cspp-wbwi/${CSPP_WBWI_GIT_VER_SRC}: \
+ sideplugin/cspp-wbwi/cspp_wbwi.cc \
+ sideplugin/cspp-wbwi/Makefile
+ +make -C sideplugin/cspp-wbwi ${CSPP_WBWI_GIT_VER_SRC}
+endif
+
# Remove the rules for which dependencies should not be generated and see if any are left.
#If so, include the dependencies; if not, do not include the dependency files
ROCKS_DEP_RULES=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean jtest package analyze tags rocksdbjavastatic% unity.% unity_test checkout_folly, $(MAKECMDGOALS))
diff --git a/README.md b/README.md
index 22ad6d838..b9d5600fe 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,67 @@
+## ToplingDB: A Persistent Key-Value Store for External Storage
+ToplingDB is developed and maintained by [Topling Inc](https://topling.cn). It is built with [RocksDB](https://github.com/facebook/rocksdb).
+
+ToplingDB has much more key features than RocksDB:
+1. [SidePlugin](https://github.com/topling/rockside/wiki) enables users to write a json(or yaml) to define DB configs
+1. [Embeded Http Server](https://github.com/topling/rockside/wiki/WebView) enables users to view almost all DB info on web, this is a component of [SidePlugin](https://github.com/topling/rockside/wiki)
+1. [Embeded Http Server](https://github.com/topling/rockside/wiki/WebView) enables users to [online change](https://github.com/topling/rockside/wiki/Online-Change-Options) db/cf options and all db meta objects(such as MemTabFactory, TableFactory, WriteBufferManager ...) without restart the running process
+1. Many improves and refactories on RocksDB, aimed for performance and extendibility
+1. [Topling**CSPP**MemTab](https://github.com/topling/rockside/wiki/ToplingCSPPMemTab)(**CSPP** is **C**rash **S**afe **P**arallel **P**atricia trie) MemTab, which outperforms SkipList on all aspects: 3x lower memory usage, 7x single thread performance, perfect multi-thread scaling
+1. [Topling**Fast**Table](https://github.com/topling/rockside/wiki/ToplingFastTable) is an SST implementation optimized for speed, aimed for MemTable flush and L0->L1 compaction.
+1. [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable) is an SST implementation optimized for RAM and SSD space, aimed for L2+ level compaction, which used dedicated searchable in-memory data compression algorithms.
+1. [Distributed Compaction](https://github.com/topling/rockside/wiki/Distributed-Compaction) for offload compactions on elastic computing clusters, this is more general than RocksDB Compaction Service.
+1. Builtin SidePlugin**s** for existing RocksDB components(Cache, Comparator, TableFactory, MemTableFactory...)
+1. Builtin Prometheus metrics support, this is based on [Embeded Http Server](https://github.com/topling/rockside/wiki/WebView)
+1. Many bugfixes for RocksDB, a small part of such fixes was [Pull Requested](https://github.com/facebook/rocksdb/pulls?q=is%3Apr+author%3Arockeet) to [upstream RocksDB](https://github.com/facebook/rocksdb)
+
+## ToplingDB cloud native services
+1. [Todis](https://github.com/topling/todis)(Redis on ToplingDB), [Todis on aliyun](https://topling.cn/products)
+2. ToplingSQL(MySQL on ToplingDB), comming soon...
+
+## ToplingDB Components
+With SidePlugin mechanics, plugins/components can be physically seperated from core toplingdb
+1. Compiled to a seperated dynamic lib and loaded at runtime
+2. User code need not any changes, just change json/yaml files
+3. Topling's non-open-source enterprise plugins/components are delivered in this way
+
+ Repository | Permission | Description (and components)
+-------------- | ---------- | -----------
+[ToplingDB](https://github.com/topling/toplingdb) | public | Top repositry, forked from [RocksDB](https://github.com/facebook/rocksdb) with our fixes, refactories and enhancements
+[rockside](https://github.com/topling/rockside) | public | This is a submodule, contains:
- SidePlugin framework
- Embeded Http Server
- Prometheus metrics
- Builtin SidePlugin**s**
+[cspp-memtable](https://github.com/topling/cspp-memtable) | **private** | Auto clone in Makefile, [open for partner](https://github.com/topling/rockside/wiki/Topling-Partner). Usage [doc](https://github.com/topling/rockside/wiki/ToplingCSPPMemTab)
+[topling-rocks](https://github.com/topling/topling-rocks) | **private** | Auto clone in Makefile, contains:- [Topling**Fast**Table](https://github.com/topling/rockside/wiki/ToplingFastTable)
- [Topling**Zip**Table](https://github.com/topling/rockside/wiki/ToplingZipTable)
- [Distributed Compaction](https://github.com/topling/rockside/wiki/Distributed-Compaction)
+
+**private** repo**s** are auto cloned in ToplingDB's Makefile, community users has no access permission to these **private** repo**s**, so the auto clone in Makefile will fail, thus ToplingDB is built without **private** components, this is so called **community** version.
+
+## Run db_bench
+ToplingDB requires gcc 8.4 or newer, or new clang(in near 3 years).
+
+Even without Topling performance components, ToplingDB is much faster than upstream RocksDB:
+```bash
+sudo yum -y install git libaio-devel gcc-c++ gflags-devel zlib-devel bzip2-devel
+git clone https://github.com/topling/toplingdb
+cd toplingdb
+make -j`nproc` db_bench DEBUG_LEVEL=0
+cp sideplugin/rockside/src/topling/web/{style.css,index.html} ${/path/to/dbdir}
+cp sideplugin/rockside/sample-conf/lcompact_community.yaml .
+export LD_LIBRARY_PATH=`find sideplugin -name lib_shared`
+# change ./lcompact_community.yaml
+# 1. path items (search /dev/shm), if you have no fast disk(such as on a cloud server), use /dev/shm
+# 2. change max_background_compactions to your cpu core num
+# command option -json can accept json and yaml files, here use yaml file for more human readable
+./db_bench -json lcompact_community.yaml -num 10000000 -disable_wal=true -value_size 2000 -benchmarks=fillrandom,readrandom -batch_size=10
+# you can access http://127.0.0.1:8081 to see webview
+# you can see this db_bench is much faster than RocksDB
+```
+## License
+We disallow bytedance using this software, other terms are identidal with
+upstream rocksdb license, see [LICENSE.Apache](LICENSE.Apache), [COPYING](COPYING) and
+[LICENSE.leveldb](LICENSE.leveldb).
+
+
+
+
+
## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage
[![CircleCI Status](https://circleci.com/gh/facebook/rocksdb.svg?style=svg)](https://circleci.com/gh/facebook/rocksdb)
diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform
index c7f9261d8..22bab2a85 100755
--- a/build_tools/build_detect_platform
+++ b/build_tools/build_detect_platform
@@ -49,7 +49,7 @@ fi
if [ "$ROCKSDB_CXX_STANDARD" ]; then
PLATFORM_CXXFLAGS="-std=$ROCKSDB_CXX_STANDARD"
else
- PLATFORM_CXXFLAGS="-std=c++17"
+ PLATFORM_CXXFLAGS="-std=gnu++17"
fi
# we currently depend on POSIX platform
@@ -244,7 +244,7 @@ EOF
Cygwin)
PLATFORM=CYGWIN
PLATFORM_SHARED_CFLAGS=""
- PLATFORM_CXXFLAGS="-std=gnu++11"
+ PLATFORM_CXXFLAGS="-std=gnu++17"
COMMON_FLAGS="$COMMON_FLAGS -DCYGWIN"
if [ -z "$USE_CLANG" ]; then
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
@@ -339,6 +339,9 @@ EOF
then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=google"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
+ else
+ echo Not found: GFLAGS 1>&2
+ exit 1
fi
fi
@@ -352,6 +355,9 @@ EOF
COMMON_FLAGS="$COMMON_FLAGS -DZLIB"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lz"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lz"
+ else
+ echo Not found: zlib "(for gzip)" 1>&2
+ exit 1
fi
fi
diff --git a/db/column_family.cc b/db/column_family.cc
index 2c93eed2c..e5ab12f45 100644
--- a/db/column_family.cc
+++ b/db/column_family.cc
@@ -81,10 +81,10 @@ ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() {
}
}
-uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd()->GetID(); }
+uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd_->GetID(); }
const std::string& ColumnFamilyHandleImpl::GetName() const {
- return cfd()->GetName();
+ return cfd_->GetName();
}
Status ColumnFamilyHandleImpl::GetDescriptor(ColumnFamilyDescriptor* desc) {
@@ -103,6 +103,13 @@ const Comparator* ColumnFamilyHandleImpl::GetComparator() const {
return cfd()->user_comparator();
}
+uint32_t ColumnFamilyHandleInternal::GetID() const {
+ return internal_cfd_->GetID();
+}
+const std::string& ColumnFamilyHandleInternal::GetName() const {
+ return internal_cfd_->GetName();
+}
+
void GetIntTblPropCollectorFactory(
const ImmutableCFOptions& ioptions,
IntTblPropCollectorFactories* int_tbl_prop_collector_factories) {
@@ -539,7 +546,7 @@ ColumnFamilyData::ColumnFamilyData(
ioptions_.max_write_buffer_size_to_maintain),
super_version_(nullptr),
super_version_number_(0),
- local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)),
+ local_sv_(&SuperVersionUnrefHandle),
next_(nullptr),
prev_(nullptr),
log_number_(0),
@@ -730,7 +737,7 @@ bool ColumnFamilyData::UnrefAndTryDelete() {
super_version_ = nullptr;
// Release SuperVersion references kept in ThreadLocalPtr.
- local_sv_.reset();
+ local_sv_.Reset(nullptr);
if (sv->Unref()) {
// Note: sv will delete this ColumnFamilyData during Cleanup()
@@ -776,7 +783,11 @@ uint64_t ColumnFamilyData::OldestLogToKeep() {
return current_log;
}
+#if defined(ROCKSDB_UNIT_TEST)
const double kIncSlowdownRatio = 0.8;
+#else
+const double kIncSlowdownRatio = 0.97; // topling specific
+#endif
const double kDecSlowdownRatio = 1 / kIncSlowdownRatio;
const double kNearStopSlowdownRatio = 0.6;
const double kDelayRecoverSlowdownRatio = 1.4;
@@ -1105,8 +1116,16 @@ uint64_t ColumnFamilyData::GetLiveSstFilesSize() const {
MemTable* ColumnFamilyData::ConstructNewMemtable(
const MutableCFOptions& mutable_cf_options, SequenceNumber earliest_seq) {
- return new MemTable(internal_comparator_, ioptions_, mutable_cf_options,
+#if !defined(ROCKSDB_UNIT_TEST)
+ auto beg = ioptions_.clock->NowNanos();
+#endif
+ auto tab = new MemTable(internal_comparator_, ioptions_, mutable_cf_options,
write_buffer_manager_, earliest_seq, id_);
+#if !defined(ROCKSDB_UNIT_TEST)
+ auto end = ioptions_.clock->NowNanos();
+ RecordInHistogram(ioptions_.stats, MEMTAB_CONSTRUCT_NANOS, end - beg);
+#endif
+ return tab;
}
void ColumnFamilyData::CreateNewMemtable(
@@ -1245,7 +1264,7 @@ SuperVersion* ColumnFamilyData::GetThreadLocalSuperVersion(DBImpl* db) {
// have swapped in kSVObsolete. We re-check the value at when returning
// SuperVersion back to thread local, with an atomic compare and swap.
// The superversion will need to be released if detected to be stale.
- void* ptr = local_sv_->Swap(SuperVersion::kSVInUse);
+ void* ptr = local_sv_.Swap(SuperVersion::kSVInUse);
// Invariant:
// (1) Scrape (always) installs kSVObsolete in ThreadLocal storage
// (2) the Swap above (always) installs kSVInUse, ThreadLocal storage
@@ -1286,7 +1305,7 @@ bool ColumnFamilyData::ReturnThreadLocalSuperVersion(SuperVersion* sv) {
assert(sv != nullptr);
// Put the SuperVersion back
void* expected = SuperVersion::kSVInUse;
- if (local_sv_->CompareAndSwap(static_cast(sv), expected)) {
+ if (local_sv_.CompareAndSwap(static_cast(sv), expected)) {
// When we see kSVInUse in the ThreadLocal, we are sure ThreadLocal
// storage has not been altered and no Scrape has happened. The
// SuperVersion is still current.
@@ -1354,7 +1373,7 @@ void ColumnFamilyData::InstallSuperVersion(
void ColumnFamilyData::ResetThreadLocalSuperVersions() {
autovector sv_ptrs;
- local_sv_->Scrape(&sv_ptrs, SuperVersion::kSVObsolete);
+ local_sv_.Scrape(&sv_ptrs, SuperVersion::kSVObsolete);
for (auto ptr : sv_ptrs) {
assert(ptr);
if (ptr == SuperVersion::kSVInUse) {
diff --git a/db/column_family.h b/db/column_family.h
index 91a825374..807b4a952 100644
--- a/db/column_family.h
+++ b/db/column_family.h
@@ -193,6 +193,8 @@ class ColumnFamilyHandleInternal : public ColumnFamilyHandleImpl {
void SetCFD(ColumnFamilyData* _cfd) { internal_cfd_ = _cfd; }
virtual ColumnFamilyData* cfd() const override { return internal_cfd_; }
+ uint32_t GetID() const final;
+ const std::string& GetName() const final;
private:
ColumnFamilyData* internal_cfd_;
@@ -517,7 +519,7 @@ class ColumnFamilyData {
return full_history_ts_low_;
}
- ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); }
+ ThreadLocalPtr* TEST_GetLocalSV() { return &local_sv_; }
WriteBufferManager* write_buffer_mgr() { return write_buffer_manager_; }
std::shared_ptr
GetFileMetadataCacheReservationManager() {
@@ -584,7 +586,7 @@ class ColumnFamilyData {
// Thread's local copy of SuperVersion pointer
// This needs to be destructed before mutex_
- std::unique_ptr local_sv_;
+ ThreadLocalPtr local_sv_;
// pointers for a circular linked list. we use it to support iterations over
// all column families that are alive (note: dropped column families can also
diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc
index f35b2b5ca..c7734826e 100644
--- a/db/compaction/compaction.cc
+++ b/db/compaction/compaction.cc
@@ -265,7 +265,10 @@ Compaction::Compaction(
compaction_reason_ = CompactionReason::kManualCompaction;
}
if (max_subcompactions_ == 0) {
- max_subcompactions_ = _mutable_db_options.max_subcompactions;
+ if (1 == output_level_ && _mutable_db_options.max_level1_subcompactions)
+ max_subcompactions_ = _mutable_db_options.max_level1_subcompactions;
+ else
+ max_subcompactions_ = _mutable_db_options.max_subcompactions;
}
#ifndef NDEBUG
@@ -372,6 +375,10 @@ bool Compaction::InputCompressionMatchesOutput() const {
return matches;
}
+bool TableFactory::InputCompressionMatchesOutput(const Compaction* c) const {
+ return c->InputCompressionMatchesOutput();
+}
+
bool Compaction::IsTrivialMove() const {
// Avoid a move if there is lots of overlapping grandparent data.
// Otherwise, the move could create a parent file that will require
@@ -401,6 +408,17 @@ bool Compaction::IsTrivialMove() const {
return false;
}
+#if !defined(ROCKSDB_UNIT_TEST) // ToplingDB specific
+ if (kCompactionStyleLevel == immutable_options_.compaction_style) {
+ auto& cfo = mutable_cf_options_;
+ if (1 == output_level_ &&
+ immutable_options_.compaction_executor_factory &&
+ cfo.write_buffer_size > cfo.target_file_size_base * 3/2) {
+ return false;
+ }
+ }
+#endif
+
// Used in universal compaction, where trivial move can be done if the
// input files are non overlapping
if ((mutable_cf_options_.compaction_options_universal.allow_trivial_move) &&
@@ -411,7 +429,7 @@ bool Compaction::IsTrivialMove() const {
if (!(start_level_ != output_level_ && num_input_levels() == 1 &&
input(0, 0)->fd.GetPathId() == output_path_id() &&
- InputCompressionMatchesOutput())) {
+ immutable_options_.table_factory->InputCompressionMatchesOutput(this))) {
return false;
}
@@ -637,6 +655,7 @@ std::unique_ptr Compaction::CreateCompactionFilter() const {
context.is_manual_compaction = is_manual_compaction_;
context.column_family_id = cfd_->GetID();
context.reason = TableFileCreationReason::kCompaction;
+ context.smallest_seqno = GetSmallestSeqno();
return cfd_->ioptions()->compaction_filter_factory->CreateCompactionFilter(
context);
}
@@ -673,7 +692,11 @@ bool Compaction::ShouldFormSubcompactions() const {
if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
return (start_level_ == 0 || is_manual_compaction_) && output_level_ > 0 &&
+ #if defined(ROCKSDB_UNIT_TEST)
!IsOutputLevelEmpty();
+ #else
+ true; // ToplingDB specific
+ #endif
} else if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
return number_levels_ > 1 && output_level_ > 0;
} else {
@@ -759,4 +782,14 @@ int Compaction::EvaluatePenultimateLevel(
return penultimate_level;
}
+uint64_t Compaction::GetSmallestSeqno() const {
+ uint64_t smallest_seqno = UINT64_MAX;
+ for (auto& eachlevel : inputs_) {
+ for (auto& eachfile : eachlevel.files)
+ if (smallest_seqno > eachfile->fd.smallest_seqno)
+ smallest_seqno = eachfile->fd.smallest_seqno;
+ }
+ return smallest_seqno;
+}
+
} // namespace ROCKSDB_NAMESPACE
diff --git a/db/compaction/compaction.h b/db/compaction/compaction.h
index bd204b122..555471fdd 100644
--- a/db/compaction/compaction.h
+++ b/db/compaction/compaction.h
@@ -153,7 +153,7 @@ class Compaction {
return &inputs_[compaction_input_level].files;
}
- const std::vector* inputs() { return &inputs_; }
+ const std::vector* inputs() const { return &inputs_; }
// Returns the LevelFilesBrief of the specified compaction input level.
const LevelFilesBrief* input_levels(size_t compaction_input_level) const {
@@ -290,7 +290,7 @@ class Compaction {
int output_level, VersionStorageInfo* vstorage,
const std::vector& inputs);
- TablePropertiesCollection GetOutputTableProperties() const {
+ const TablePropertiesCollection& GetOutputTableProperties() const {
return output_table_properties_;
}
@@ -356,6 +356,7 @@ class Compaction {
bool ShouldNotifyOnCompactionCompleted() const {
return notify_on_compaction_completion_;
}
+ uint64_t GetSmallestSeqno() const;
static constexpr int kInvalidLevel = -1;
// Evaluate penultimate output level. If the compaction supports
@@ -460,6 +461,7 @@ class Compaction {
// Does input compression match the output compression?
bool InputCompressionMatchesOutput() const;
+ friend class TableFactory; // use InputCompressionMatchesOutput
// table properties of output files
TablePropertiesCollection output_table_properties_;
diff --git a/db/compaction/compaction_executor.cc b/db/compaction/compaction_executor.cc
new file mode 100644
index 000000000..fcf79590f
--- /dev/null
+++ b/db/compaction/compaction_executor.cc
@@ -0,0 +1,326 @@
+//
+// Created by leipeng on 2021/1/11.
+//
+
+#include "compaction_executor.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+CompactionParams::CompactionParams() {
+ is_deserialized = false;
+}
+CompactionParams::~CompactionParams() {
+ if (is_deserialized) {
+ ROCKSDB_VERIFY(IsCompactionWorker());
+ /*
+ for (auto& x : *inputs) {
+ for (auto& e : x.atomic_compaction_unit_boundaries) {
+ delete e.smallest;
+ delete e.largest;
+ }
+ }
+ */
+ if (grandparents) {
+ for (auto meta : *grandparents) {
+ delete meta;
+ }
+ delete grandparents;
+ }
+ if (inputs) {
+ for (auto& level_files : *inputs) {
+ for (auto meta : level_files.files)
+ delete meta;
+ }
+ delete inputs;
+ }
+ delete existing_snapshots;
+ //delete compaction_job_stats;
+ }
+ else {
+ //ROCKSDB_VERIFY(!IsCompactionWorker());
+ }
+}
+
+#if defined(_MSC_VER)
+static std::string html_user_key_decode(const CompactionParams&, Slice uk) {
+ return uk.ToString(true);
+}
+#else
+std::string __attribute__((weak))
+CompactionParams_html_user_key_decode(const CompactionParams&, Slice);
+static std::string html_user_key_decode(const CompactionParams& cp, Slice uk) {
+ if (CompactionParams_html_user_key_decode)
+ return CompactionParams_html_user_key_decode(cp, uk);
+ else
+ return uk.ToString(true);
+}
+#endif
+
+static void PrintVersionSetSerDe(FILE* fp, const VersionSetSerDe& v) {
+ fprintf(fp, "VersionSetSerDe\n");
+ fprintf(fp, " last_sequence = %zd, "
+ "last_allocated_sequence = %zd, "
+ "last_published_sequence = %zd\n",
+ size_t(v.last_sequence),
+ size_t(v.last_allocated_sequence),
+ size_t(v.last_published_sequence));
+ fprintf(fp, " next_file_number = %zd, "
+ "min_log_number_to_keep_2pc = %zd, "
+ "manifest_file_number = %zd, "
+ "options_file_number = %zd, "
+ "prev_log_number = %zd, "
+ "current_version_number = %zd\n",
+ size_t(v.next_file_number),
+ #if ROCKSDB_MAJOR < 7
+ size_t(v.min_log_number_to_keep_2pc),
+ #else
+ size_t(v.min_log_number_to_keep),
+ #endif
+ size_t(v.manifest_file_number),
+ size_t(v.options_file_number),
+ size_t(v.prev_log_number),
+ size_t(v.current_version_number));
+}
+static void PrintFileMetaData(const CompactionParams& cp,
+ FILE* fp, const FileMetaData* f) {
+ Slice temperature = enum_name(f->temperature);
+ std::string lo = html_user_key_decode(cp, f->smallest.user_key());
+ std::string hi = html_user_key_decode(cp, f->largest.user_key());
+ fprintf(fp,
+ " %06zd.sst : entries = %zd, del = %zd, rks = %zd, rvs = %zd, "
+ "fsize = %zd : %zd, temp = %.*s, seq = %zd : %zd, rng = %.*s : %.*s\n",
+ size_t(f->fd.GetNumber()),
+ size_t(f->num_entries), size_t(f->num_deletions),
+ size_t(f->raw_key_size), size_t(f->raw_value_size),
+ size_t(f->fd.file_size), size_t(f->compensated_file_size),
+ int(temperature.size_), temperature.data_,
+ size_t(f->fd.smallest_seqno), size_t(f->fd.largest_seqno),
+ int(lo.size()), lo.data(), int(hi.size()), hi.data());
+}
+
+std::string CompactionParams::DebugString() const {
+ size_t mem_len = 0;
+ char* mem_buf = nullptr;
+ FILE* fp = open_memstream(&mem_buf, &mem_len);
+ fprintf(fp, "job_id = %d, output_level = %d, dbname = %s, cfname = %s\n",
+ job_id, output_level, dbname.c_str(), cf_name.c_str());
+ fprintf(fp, "bottommost_level = %d, compaction_reason = %s\n",
+ bottommost_level, enum_cstr(compaction_reason));
+ fprintf(fp, "smallest_user_key = %s\n", html_user_key_decode(*this, smallest_user_key).c_str());
+ fprintf(fp, "llargest_user_key = %s\n", html_user_key_decode(*this, largest_user_key).c_str());
+ for (size_t i = 0; i < inputs->size(); ++i) {
+ auto& l = inputs->at(i);
+ fprintf(fp, "inputs.size = %zd : %zd : level = %d, size = %3zd\n",
+ inputs->size(), i, l.level, l.size());
+ for (auto fmd : l.files) {
+ PrintFileMetaData(*this, fp, fmd);
+ }
+ }
+ if (grandparents) {
+ fprintf(fp, "grandparents.size = %zd\n", grandparents->size());
+ for (size_t i = 0; i < grandparents->size(); ++i) {
+ FileMetaData* fmd = grandparents->at(i);
+ PrintFileMetaData(*this, fp, fmd);
+ }
+ }
+ else {
+ fprintf(fp, "grandparents = nullptr\n");
+ }
+ if (existing_snapshots) {
+ fprintf(fp, "existing_snapshots.size = %zd\n", existing_snapshots->size());
+ }
+ else {
+ fprintf(fp, "existing_snapshots = nullptr\n");
+ }
+ PrintVersionSetSerDe(fp, version_set);
+ fclose(fp);
+ std::string result(mem_buf, mem_len);
+ free(mem_buf);
+ return result;
+}
+
+// res[0] : raw
+// res[1] : zip
+void CompactionParams::InputBytes(size_t* res) const {
+ size_t raw = 0, zip = 0;
+ for (auto& eachlevel : *inputs) {
+ for (auto& eachfile : eachlevel.files) {
+ zip += eachfile->fd.file_size;
+ raw += eachfile->raw_key_size + eachfile->raw_value_size;
+ }
+ }
+ res[0] = raw;
+ res[1] = zip;
+}
+
+CompactionResults::CompactionResults() {
+ curl_time_usec = 0;
+ work_time_usec = 0;
+ mount_time_usec = 0;
+ prepare_time_usec = 0;
+ waiting_time_usec = 0;
+ output_index_size = 0;
+ output_data_size = 0;
+}
+CompactionResults::~CompactionResults() {}
+
+struct MyVersionSet : VersionSet {
+ void From(const VersionSetSerDe& version_set) {
+ next_file_number_ = version_set.next_file_number;
+ last_sequence_ = version_set.last_sequence;
+ // below are not necessary fields, but we serialize it for
+ // for completeness debugging
+ last_allocated_sequence_ = version_set.last_allocated_sequence;
+ last_published_sequence_ = version_set.last_published_sequence;
+ #if ROCKSDB_MAJOR < 7
+ min_log_number_to_keep_2pc_ = version_set.min_log_number_to_keep_2pc;
+ #else
+ min_log_number_to_keep_ = version_set.min_log_number_to_keep;
+ #endif
+ manifest_file_number_ = version_set.manifest_file_number;
+ options_file_number_ = version_set.options_file_number;
+ //pending_manifest_file_number_ is temporal on running, do NOT serilize!
+ //pending_manifest_file_number_ = version_set.pending_manifest_file_number;
+ prev_log_number_ = version_set.prev_log_number;
+ current_version_number_ = version_set.current_version_number;
+ }
+ void To(VersionSetSerDe& version_set) const {
+ version_set.next_file_number = next_file_number_;
+ version_set.last_sequence = last_sequence_;
+ // below are not necessary fields, but we serialize it for
+ // for completeness debugging
+ version_set.last_allocated_sequence = last_allocated_sequence_;
+ version_set.last_published_sequence = last_published_sequence_;
+ #if ROCKSDB_MAJOR < 7
+ version_set.min_log_number_to_keep_2pc = min_log_number_to_keep_2pc_;
+ #else
+ version_set.min_log_number_to_keep = min_log_number_to_keep_;
+ #endif
+ version_set.manifest_file_number = manifest_file_number_;
+ version_set.options_file_number = options_file_number_;
+ //pending_manifest_file_number_ is temporal on running, do NOT serilize!
+ //version_set.pending_manifest_file_number = pending_manifest_file_number_;
+ version_set.prev_log_number = prev_log_number_;
+ version_set.current_version_number = current_version_number_;
+ }
+};
+void VersionSetSerDe::From(const VersionSet* vs) {
+ static_cast(vs)->To(*this); // NOLINT
+}
+void VersionSetSerDe::To(VersionSet* vs) const {
+ static_cast(vs)->From(*this); // NOLINT
+}
+
+CompactionExecutor::~CompactionExecutor() = default;
+CompactionExecutorFactory::~CompactionExecutorFactory() = default;
+
+static bool g_is_compaction_worker = false;
+bool IsCompactionWorker() {
+ return g_is_compaction_worker;
+}
+void SetAsCompactionWorker() {
+ g_is_compaction_worker = true;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+std::string GetDirFromEnv(const char* name, const char* Default) {
+ const char* dir = getenv(name);
+ if (nullptr == dir) {
+ ROCKSDB_VERIFY(nullptr != Default);
+ dir = Default;
+ }
+ size_t dir_name_len = strlen(dir);
+ ROCKSDB_VERIFY(dir_name_len > 0);
+ while (dir_name_len && '/' == dir[dir_name_len-1]) {
+ dir_name_len--;
+ }
+ ROCKSDB_VERIFY(dir_name_len > 0);
+ return std::string(dir, dir_name_len);
+}
+
+bool ReplacePrefix(Slice Old, Slice New, Slice str, std::string* res) {
+ ROCKSDB_VERIFY(Old.size_ > 0);
+ ROCKSDB_VERIFY(New.size_ > 0);
+ while (Old.size_ && Old.data_[Old.size_-1] == '/') {
+ --Old.size_;
+ }
+ while (New.size_ && New.data_[New.size_-1] == '/') {
+ --New.size_;
+ }
+ ROCKSDB_VERIFY(Old.size_ > 0);
+ ROCKSDB_VERIFY(New.size_ > 0);
+ if (str.starts_with(Old)) {
+ size_t suffixLen = str.size_ - Old.size_;
+ res->reserve(New.size_ + suffixLen);
+ res->assign(New.data_, New.size_);
+ res->append(str.data_ + Old.size_, suffixLen);
+ return true;
+ }
+ return false;
+}
+
+std::string ReplacePrefix(Slice Old, Slice New, Slice str) {
+ std::string res;
+ if (ReplacePrefix(Old, New, str, &res)) {
+ return res;
+ }
+ ROCKSDB_DIE("str = '%.*s' does not start with Old='%.*s'",
+ int(str.size()), str.data(), int(Old.size()), Old.data());
+}
+
+void ReplaceAll(std::string& str, Slice from, Slice to) {
+ if (from.empty()) return;
+ size_t start_pos = 0;
+ while ((start_pos = str.find(from.data(), start_pos)) != std::string::npos) {
+ str.replace(start_pos, from.size(), to.data(), to.size());
+ start_pos += to.size();
+ }
+}
+std::string ReplaceAll(Slice str, Slice from, Slice to) {
+ std::string tmp(str.data(), str.size());
+ ReplaceAll(tmp, from, to);
+ return tmp;
+}
+std::string MakePath(std::string dir, Slice sub) {
+ while (!dir.empty() && '/' == dir.back()) {
+ dir.pop_back();
+ }
+ dir.reserve(dir.size() + 1 + sub.size());
+ ROCKSDB_VERIFY(!sub.empty());
+ while (!sub.empty() && '/' == sub[0]) {
+ sub.remove_prefix(1);
+ }
+ ROCKSDB_VERIFY(!sub.empty());
+ dir.push_back('/');
+ dir.append(sub.data(), sub.size());
+ return dir;
+}
+
+std::string& AppendJobID(std::string& dir, int job_id) {
+ while (!dir.empty() && '/' == dir.back()) {
+ dir.pop_back();
+ }
+ char buf[32];
+ dir.append(buf, snprintf(buf, sizeof(buf), "/job-%05d", job_id));
+ return dir;
+}
+std::string CatJobID(const std::string& dir, int job_id) {
+ std::string output_path = dir;
+ AppendJobID(output_path, job_id);
+ return output_path;
+}
+std::string& AppendAttempt(std::string& dir, int attempt) {
+ while (!dir.empty() && '/' == dir.back()) {
+ dir.pop_back();
+ }
+ char buf[32];
+ dir.append(buf, snprintf(buf, sizeof(buf), "/att-%02d", attempt));
+ return dir;
+}
+std::string CatAttempt(const std::string& dir, int attempt) {
+ std::string output_path = dir;
+ AppendAttempt(output_path, attempt);
+ return output_path;
+}
+
+} // namespace ROCKSDB_NAMESPACE
diff --git a/db/compaction/compaction_executor.h b/db/compaction/compaction_executor.h
new file mode 100644
index 000000000..8755263ba
--- /dev/null
+++ b/db/compaction/compaction_executor.h
@@ -0,0 +1,185 @@
+//
+// Created by leipeng on 2021/1/11.
+//
+#pragma once
+#include "compaction_job.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+struct ObjectRpcParam {
+ std::string clazz;
+ std::string params; // construction json params
+ typedef std::function serde_fn_t;
+ serde_fn_t serde;
+};
+struct VersionSetSerDe {
+ uint64_t last_sequence;
+ uint64_t last_allocated_sequence;
+ uint64_t last_published_sequence;
+ uint64_t next_file_number;
+ #if ROCKSDB_MAJOR < 7
+ uint64_t min_log_number_to_keep_2pc;
+ #else
+ uint64_t min_log_number_to_keep;
+ #endif
+ uint64_t manifest_file_number;
+ uint64_t options_file_number;
+ //uint64_t pending_manifest_file_number;
+ uint64_t prev_log_number;
+ uint64_t current_version_number;
+ void From(const VersionSet*);
+ void To(VersionSet*) const;
+};
+struct CompactionParams {
+ CompactionParams(const CompactionParams&) = delete;
+ CompactionParams& operator=(const CompactionParams&) = delete;
+ CompactionParams();
+ ~CompactionParams();
+ int job_id;
+ int num_levels;
+ int output_level;
+ uint32_t cf_id;
+ std::string cf_name;
+ const std::vector* inputs = nullptr;
+ VersionSetSerDe version_set;
+ uint64_t target_file_size;
+ uint64_t max_compaction_bytes;
+
+ // we add a dedicated path to compaction worker's cf_path as
+ // output path, thus reduce changes to the existing rocksdb code.
+ // the output_path_id should be the last elem of cf_paths, so it
+ // needs not the field output_path_id.
+ //uint32_t output_path_id; // point to the extra cf_path
+ //std::string output_path; // will append to cfopt.cf_paths on remote node?
+ std::vector cf_paths;
+
+ uint32_t max_subcompactions; // num_threads
+ CompressionType compression;
+ CompressionOptions compression_opts;
+ const std::vector* grandparents = nullptr;
+ double score;
+ bool manual_compaction;
+ bool deletion_compaction;
+ InfoLogLevel compaction_log_level;
+ CompactionReason compaction_reason;
+
+ //VersionSet* version_set;
+ SequenceNumber preserve_deletes_seqnum;
+ const std::vector* existing_snapshots = nullptr;
+ SequenceNumber smallest_seqno;
+ SequenceNumber earliest_write_conflict_snapshot;
+ bool paranoid_file_checks;
+ uint32_t rocksdb_src_version;
+ std::string rocksdb_src_githash;
+ std::string hoster_root;
+ std::string instance_name;
+ std::string dbname;
+ std::string db_id;
+ std::string db_session_id;
+ std::string full_history_ts_low;
+ //CompactionJobStats* compaction_job_stats = nullptr; // this is out param
+ //SnapshotChecker* snapshot_checker; // not used
+ //FSDirectory* db_directory;
+ //FSDirectory* output_directory;
+ //FSDirectory* blob_output_directory;
+
+ std::string smallest_user_key; // serialization must before
+ std::string largest_user_key; // ObjectRpcParam fields
+ //ObjectRpcParam compaction_filter; // don't use compaction_filter
+ ObjectRpcParam compaction_filter_factory; // always use
+ ObjectRpcParam merge_operator;
+ ObjectRpcParam user_comparator;
+ ObjectRpcParam table_factory;
+ ObjectRpcParam prefix_extractor;
+ ObjectRpcParam sst_partitioner_factory;
+ ObjectRpcParam html_user_key_coder;
+
+ //bool skip_filters;
+ bool allow_ingest_behind;
+ bool preserve_deletes;
+ bool bottommost_level;
+ bool is_deserialized;
+ std::vector listeners;
+ std::vector table_properties_collector_factories;
+
+ // CompactionFilterFactory ... can have individual serde files
+ mutable std::vector extra_serde_files;
+ Logger* info_log = nullptr; // do not serialize, just for running process
+ mutable class UserKeyCoder* p_html_user_key_coder = nullptr;
+ const std::atomic* shutting_down = nullptr; // do not serialize
+
+ std::string DebugString() const;
+ void InputBytes(size_t* res) const;
+};
+
+struct CompactionResults {
+ CompactionResults(const CompactionResults&) = delete;
+ CompactionResults& operator=(const CompactionResults&) = delete;
+ CompactionResults();
+ ~CompactionResults();
+ struct FileMinMeta {
+ uint64_t file_number;
+ uint64_t file_size;
+ uint64_t smallest_seqno;
+ uint64_t largest_seqno;
+ InternalKey smallest_ikey;
+ InternalKey largest_ikey;
+ bool marked_for_compaction;
+ };
+ // collect remote statistics
+ struct RawStatistics {
+ uint64_t tickers[INTERNAL_TICKER_ENUM_MAX] = {0};
+ HistogramStat histograms[INTERNAL_HISTOGRAM_ENUM_MAX];
+ };
+
+ std::string output_dir;
+ std::vector > output_files;
+ InternalStats::CompactionStats compaction_stats;
+ CompactionJobStats job_stats;
+ RawStatistics statistics;
+ Status status;
+ size_t curl_time_usec; // set by CompactionExecutor, not worker
+ size_t work_time_usec;
+ size_t mount_time_usec; // mount nfs
+ size_t prepare_time_usec; // open nfs params/results
+ size_t waiting_time_usec; // wait in work queue
+
+ uint64_t output_index_size; // not serialized, just for DB side convenient
+ uint64_t output_data_size; // not serialized, just for DB side convenient
+
+ size_t all_time_usec() const {
+ return curl_time_usec + mount_time_usec + prepare_time_usec + work_time_usec;
+ }
+};
+
+class CompactionExecutor {
+ public:
+ virtual ~CompactionExecutor();
+ virtual void SetParams(CompactionParams*, const Compaction*) = 0;
+ virtual Status Execute(const CompactionParams&, CompactionResults*) = 0;
+ virtual void CleanFiles(const CompactionParams&, const CompactionResults&) = 0;
+};
+
+class CompactionExecutorFactory {
+ public:
+ virtual ~CompactionExecutorFactory();
+ virtual bool ShouldRunLocal(const Compaction*) const = 0;
+ virtual bool AllowFallbackToLocal() const = 0;
+ virtual CompactionExecutor* NewExecutor(const Compaction*) const = 0;
+ virtual const char* Name() const = 0;
+};
+
+/////////////////////////////////////////////////////////////////////////////
+
+std::string GetDirFromEnv(const char* name, const char* Default = nullptr);
+bool ReplacePrefix(Slice Old, Slice New, Slice str, std::string* res);
+std::string ReplacePrefix(Slice Old, Slice New, Slice str);
+void ReplaceAll(std::string& str, Slice from, Slice to);
+std::string ReplaceAll(Slice str, Slice from, Slice to);
+std::string MakePath(std::string dir, Slice sub);
+std::string& AppendJobID(std::string& path, int job_id);
+std::string CatJobID(const std::string& path, int job_id);
+std::string& AppendAttempt(std::string& path, int attempt);
+std::string CatAttempt(const std::string& path, int attempt);
+
+} // namespace ROCKSDB_NAMESPACE
diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc
index b914f5e9d..74051b309 100644
--- a/db/compaction/compaction_job.cc
+++ b/db/compaction/compaction_job.cc
@@ -8,6 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/compaction/compaction_job.h"
+#include "compaction_executor.h"
#include
#include
@@ -44,6 +45,8 @@
#include "port/port.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
+#include "rocksdb/merge_operator.h"
+#include "rocksdb/sst_partitioner.h"
#include "rocksdb/statistics.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
@@ -306,6 +309,30 @@ void CompactionJob::GenSubcompactionBoundaries() {
int start_lvl = c->start_level();
int out_lvl = c->output_level();
+ auto try_add_rand_keys = [&](FileMetaData* fmd) {
+ Cache::Handle* ch = fmd->table_reader_handle;
+ if (nullptr == ch)
+ return false;
+ TableCache* tc = cfd->table_cache();
+ TableReader* tr = tc->GetTableReaderFromHandle(ch);
+ std::vector rand_keys;
+ if (tr->GetRandomInteranlKeysAppend(59, &rand_keys) && rand_keys.size()) {
+ rand_keys.push_back(*fmd->smallest.rep());
+ rand_keys.push_back(*fmd->largest.rep());
+ auto icmp = &cfd->internal_comparator();
+ std::sort(rand_keys.begin(), rand_keys.end(),
+ [icmp](Slice x, Slice y) {
+ return icmp->Compare(x, y) < 0;
+ });
+ for (auto& onekey : rand_keys) {
+ bounds.emplace_back(onekey);
+ }
+ rand_key_store_.push_back(std::move(rand_keys));
+ return true;
+ }
+ return false;
+ };
+
// Add the starting and/or ending key of certain input files as a potential
// boundary
for (size_t lvl_idx = 0; lvl_idx < c->num_input_levels(); lvl_idx++) {
@@ -322,6 +349,9 @@ void CompactionJob::GenSubcompactionBoundaries() {
// For level 0 add the starting and ending key of each file since the
// files may have greatly differing key ranges (not range-partitioned)
for (size_t i = 0; i < num_files; i++) {
+ if (try_add_rand_keys(flevel->files[i].file_metadata)) {
+ continue;
+ }
bounds.emplace_back(flevel->files[i].smallest_key);
bounds.emplace_back(flevel->files[i].largest_key);
}
@@ -423,6 +453,23 @@ void CompactionJob::GenSubcompactionBoundaries() {
}
Status CompactionJob::Run() {
+ auto icf_opt = compact_->compaction->immutable_options();
+ auto exec = icf_opt->compaction_executor_factory.get();
+ if (!exec || exec->ShouldRunLocal(compact_->compaction)) {
+ return RunLocal();
+ }
+ Status s = RunRemote();
+ if (!s.ok()) {
+ if (exec->AllowFallbackToLocal()) {
+ s = RunLocal();
+ } else {
+ // fatal, rocksdb does not handle compact errors properly
+ }
+ }
+ return s;
+}
+
+Status CompactionJob::RunLocal() {
AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_COMPACTION_RUN);
TEST_SYNC_POINT("CompactionJob::Run():Start");
@@ -459,6 +506,28 @@ Status CompactionJob::Run() {
RecordTimeToHistogram(stats_, COMPACTION_TIME,
compaction_stats_.stats.micros);
+ for (size_t i = 0; i < compact_->sub_compact_states.size(); i++) {
+ auto& sub = compact_->sub_compact_states[i];
+ for (size_t j = 0; j < sub.outputs.size(); ++j) {
+ auto& meta = sub.outputs[j].meta;
+ auto raw = meta.raw_key_size + meta.raw_value_size;
+ auto zip = meta.fd.file_size;
+ RecordTick(stats_, LCOMPACT_WRITE_BYTES_RAW, raw);
+ RecordTimeToHistogram(stats_, LCOMPACTION_OUTPUT_FILE_RAW_SIZE, raw);
+ RecordTimeToHistogram(stats_, LCOMPACTION_OUTPUT_FILE_ZIP_SIZE, zip);
+ }
+ }
+ uint64_t sum_raw = 0, sum_zip = 0;
+ for (auto& each_level : *compact_->compaction->inputs()) {
+ for (FileMetaData* fmd : each_level.files) {
+ sum_raw += fmd->raw_key_size + fmd->raw_value_size;
+ sum_zip += fmd->fd.file_size;
+ }
+ }
+ RecordTimeToHistogram(stats_, LCOMPACTION_INPUT_RAW_BYTES, sum_raw);
+ RecordTimeToHistogram(stats_, LCOMPACTION_INPUT_ZIP_BYTES, sum_zip);
+
+ RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.stats.micros);
RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
compaction_stats_.stats.cpu_micros);
@@ -613,8 +682,257 @@ Status CompactionJob::Run() {
return status;
}
+void CompactionJob::GetSubCompactOutputs(
+ std::vector >* outputs) const {
+ outputs->clear();
+ outputs->reserve(compact_->sub_compact_states.size());
+ for (const auto& state : compact_->sub_compact_states) {
+ outputs->emplace_back();
+ auto& cur_sub = outputs->back();
+ for (const auto& output : state.outputs) {
+ cur_sub.push_back(&output.meta);
+ }
+ }
+}
+
+Status CompactionJob::RunRemote()
+try {
+ AutoThreadOperationStageUpdater stage_updater(
+ ThreadStatus::STAGE_COMPACTION_RUN);
+ TEST_SYNC_POINT("CompactionJob::RunRemote():Start");
+ log_buffer_->FlushBufferToLog();
+ LogCompaction();
+
+ size_t num_threads = compact_->sub_compact_states.size();
+ assert(num_threads > 0);
+ const Compaction* c = compact_->compaction;
+ ColumnFamilyData* cfd = c->column_family_data();
+ auto imm_cfo = c->immutable_options();
+ auto mut_cfo = c->mutable_cf_options();
+
+ // if with compaction filter, always use compaction filter factory
+ assert(nullptr == imm_cfo->compaction_filter);
+ CompactionParams rpc_params;
+ CompactionResults rpc_results;
+
+ rpc_results.status = Status::Incomplete("Just Created");
+ rpc_params.job_id = job_id_;
+ rpc_params.version_set.From(versions_);
+ #if (ROCKSDB_MAJOR * 10000 + ROCKSDB_MINOR * 10 + ROCKSDB_PATCH) < 70030
+ rpc_params.preserve_deletes_seqnum = preserve_deletes_seqnum_;
+ #endif
+ rpc_params.existing_snapshots = &existing_snapshots_;
+ rpc_params.earliest_write_conflict_snapshot = earliest_write_conflict_snapshot_;
+ rpc_params.paranoid_file_checks = paranoid_file_checks_;
+ rpc_params.dbname = this->dbname_;
+ rpc_params.db_id = this->db_id_;
+ rpc_params.db_session_id = this->db_session_id_;
+ rpc_params.full_history_ts_low = this->full_history_ts_low_;
+//rpc_params.compaction_job_stats = this->compaction_job_stats_;
+ rpc_params.max_subcompactions = uint32_t(num_threads);
+ rpc_params.shutting_down = this->shutting_down_;
+
+ const uint64_t start_micros = env_->NowMicros();
+ auto exec_factory = imm_cfo->compaction_executor_factory.get();
+ assert(nullptr != exec_factory);
+ auto exec = exec_factory->NewExecutor(c);
+ std::unique_ptr exec_auto_del(exec);
+ exec->SetParams(&rpc_params, c);
+ Status s = exec->Execute(rpc_params, &rpc_results);
+ if (!s.ok()) {
+ compact_->status = s;
+ return s;
+ }
+ if (!rpc_results.status.ok()) {
+ compact_->status = rpc_results.status;
+ return rpc_results.status;
+ }
+ //exec->NotifyResults(&rpc_results, c);
+
+ // remote compact fabricates a version_set, which may cause
+ // GenSubcompactionBoundaries yield different num of sub_compact_states,
+ // thus makes the following assert fail:
+ //assert(rpc_results.output_files.size() == num_threads); // can be diff
+
+ const uint64_t elapsed_us = env_->NowMicros() - start_micros;
+ compaction_stats_.stats = rpc_results.compaction_stats;
+ *compaction_job_stats_ = rpc_results.job_stats;
+
+ // remote statistics will be merged to stat_ later: stats_->Merge(..)
+ //RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros);
+ //RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME, compaction_stats_.cpu_micros);
+
+ TablePropertiesCollection tp_map;
+ auto& cf_paths = imm_cfo->cf_paths;
+ compact_->num_output_files = 0;
+
+ if (rpc_results.output_files.size() != num_threads) {
+ size_t result_sub_num = rpc_results.output_files.size();
+ // this will happen, but is rare, log it
+ ROCKS_LOG_INFO(db_options_.info_log,
+ "job-%05d: subcompact num diff: rpc = %zd, local = %zd",
+ job_id_, result_sub_num, num_threads);
+ num_threads = result_sub_num;
+ auto& sub_vec = compact_->sub_compact_states;
+ while (sub_vec.size() < result_sub_num) {
+ int sub_job_id = 0;
+ sub_vec.emplace_back(compact_->compaction, nullptr, nullptr, sub_job_id);
+ }
+ while (sub_vec.size() > result_sub_num) {
+ sub_vec.pop_back();
+ }
+ }
+
+ long long rename_t0 = env_->NowMicros();
+ size_t out_raw_bytes = 0;
+ for (size_t i = 0; i < num_threads; ++i) {
+ auto& sub_state = compact_->sub_compact_states[i];
+ for (const auto& min_meta : rpc_results.output_files[i]) {
+ auto old_fnum = min_meta.file_number;
+ auto old_fname = MakeTableFileName(rpc_results.output_dir, old_fnum);
+ auto path_id = c->output_path_id();
+ uint64_t file_number = versions_->NewFileNumber();
+ std::string new_fname = TableFileName(cf_paths, file_number, path_id);
+ Status st = env_->RenameFile(old_fname, new_fname);
+ if (!st.ok()) {
+ ROCKS_LOG_ERROR(db_options_.info_log, "rename(%s, %s) = %s",
+ old_fname.c_str(), new_fname.c_str(), st.ToString().c_str());
+ compact_->status = st;
+ return st;
+ }
+ FileDescriptor fd(file_number, path_id, min_meta.file_size,
+ min_meta.smallest_seqno, min_meta.largest_seqno);
+ TableCache* tc = cfd->table_cache();
+ Cache::Handle* ch = nullptr;
+ auto& icmp = cfd->internal_comparator();
+ auto& fopt = *cfd->soptions(); // file_options
+ #if ROCKSDB_MAJOR < 7
+ auto pref_ext = mut_cfo->prefix_extractor.get();
+ #else
+ auto& pref_ext = mut_cfo->prefix_extractor;
+ #endif
+ st = tc->FindTable(ReadOptions(), fopt, icmp, fd, &ch, pref_ext);
+ if (!st.ok()) {
+ compact_->status = st;
+ return st;
+ }
+ assert(nullptr != ch);
+ TableReader* tr = tc->GetTableReaderFromHandle(ch);
+ auto tp = tr->GetTableProperties();
+ tp_map[new_fname] = tr->GetTableProperties();
+ out_raw_bytes += tp->raw_key_size + tp->raw_value_size;
+ tc->ReleaseHandle(ch); // end use of TableReader in handle
+ FileMetaData meta;
+ meta.fd = fd;
+ meta.smallest = min_meta.smallest_ikey;
+ meta.largest = min_meta.largest_ikey;
+ meta.num_deletions = tp->num_deletions;
+ meta.num_entries = tp->num_entries;
+ meta.raw_key_size = tp->raw_key_size;
+ meta.raw_value_size = tp->raw_value_size;
+ meta.marked_for_compaction = min_meta.marked_for_compaction;
+ bool enable_order_check = mut_cfo->check_flush_compaction_key_order;
+ bool enable_hash = paranoid_file_checks_;
+ uint64_t precalculated_hash = 0;
+ sub_state.outputs.emplace_back(std::move(meta), icmp,
+ enable_order_check, enable_hash, true, precalculated_hash);
+ sub_state.total_bytes += min_meta.file_size;
+ sub_state.num_output_records += tp->num_entries;
+ rpc_results.output_index_size += tp->index_size;
+ rpc_results.output_data_size += tp->data_size;
+ }
+ // instead AggregateStatistics:
+ compact_->num_output_files += sub_state.outputs.size();
+ compact_->total_bytes += sub_state.total_bytes;
+ compact_->num_output_records += sub_state.num_output_records;
+ }
+ compact_->compaction->SetOutputTableProperties(std::move(tp_map));
+ long long rename_t1 = env_->NowMicros();
+
+ {
+ Compaction::InputLevelSummaryBuffer inputs_summary; // NOLINT
+ double work_time_us = rpc_results.work_time_usec;
+ if (work_time_us <= 1) work_time_us = 1;
+ ROCKS_LOG_INFO(db_options_.info_log,
+ "[%s] [JOB %d] Dcompacted %s [%zd] => time sec: "
+ "curl = %6.3f, mount = %6.3f, prepare = %6.3f, "
+ "wait = %6.3f, work = %6.3f, e2e = %6.3f, rename = %6.3f, "
+ "out zip = %9.6f GB %8.3f MB/sec, "
+ "out raw = %9.6f GB %8.3f MB/sec",
+ c->column_family_data()->GetName().c_str(), job_id_,
+ c->InputLevelSummary(&inputs_summary), compact_->num_output_files,
+ rpc_results.curl_time_usec/1e6,
+ rpc_results.mount_time_usec/1e6,
+ rpc_results.prepare_time_usec/1e6,
+ (elapsed_us - work_time_us)/1e6, // wait is non-work
+ work_time_us/1e6, elapsed_us/1e6, (rename_t1 - rename_t0)/1e9,
+ compact_->total_bytes/1e9, compact_->total_bytes/work_time_us,
+ out_raw_bytes/1e9, out_raw_bytes/work_time_us);
+ }
+ // Finish up all book-keeping to unify the subcompaction results
+ // these were run on remote compaction worker node
+ //AggregateStatistics();
+ //UpdateCompactionStats();
+ //compaction_job_stats_->Add(rpc_results.job_stats); // instead AggregateStatistics
+
+ //RecordCompactionIOStats(); // update remote statistics to local -->>
+#if defined(__GNUC__)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+#define MoveHG(dst,src) \
+ memcpy(&rpc_results.statistics.histograms[dst], \
+ &rpc_results.statistics.histograms[src], \
+ sizeof rpc_results.statistics.histograms[src]), \
+ rpc_results.statistics.histograms[src].Clear()
+ MoveHG(DCOMPACTION_INPUT_RAW_BYTES, LCOMPACTION_INPUT_RAW_BYTES);
+ MoveHG(DCOMPACTION_INPUT_ZIP_BYTES, LCOMPACTION_INPUT_ZIP_BYTES);
+ MoveHG(DCOMPACTION_OUTPUT_FILE_RAW_SIZE, LCOMPACTION_OUTPUT_FILE_RAW_SIZE);
+ MoveHG(DCOMPACTION_OUTPUT_FILE_ZIP_SIZE, LCOMPACTION_OUTPUT_FILE_ZIP_SIZE);
+#if defined(__GNUC__)
+ #pragma GCC diagnostic pop
+#endif
+
+#define MoveTK(dst, src) \
+ rpc_results.statistics.tickers[dst] = rpc_results.statistics.tickers[src]; \
+ rpc_results.statistics.tickers[src] = 0
+
+ MoveTK(DCOMPACT_WRITE_BYTES_RAW, LCOMPACT_WRITE_BYTES_RAW);
+ MoveTK(REMOTE_COMPACT_READ_BYTES, COMPACT_READ_BYTES);
+ MoveTK(REMOTE_COMPACT_WRITE_BYTES, COMPACT_WRITE_BYTES);
+
+ stats_->Merge(rpc_results.statistics.tickers,
+ rpc_results.statistics.histograms);
+
+ LogFlush(db_options_.info_log);
+ TEST_SYNC_POINT("CompactionJob::RunRemote():End");
+
+ exec->CleanFiles(rpc_params, rpc_results);
+
+ compact_->status = Status::OK();
+ return Status::OK();
+}
+catch (const std::exception& ex) {
+ compact_->status = Status::Corruption(ROCKSDB_FUNC, ex.what());
+ return compact_->status;
+}
+catch (const Status& s) {
+ compact_->status = s;
+ return s;
+}
+
Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
assert(compact_);
+ if (!compact_->status.ok()) { // caller does not check retval of Run()
+ ColumnFamilyData* cfd = compact_->compaction->column_family_data();
+ assert(cfd);
+ ROCKS_LOG_BUFFER(log_buffer_, "[%s] compaction failed, job_id = %d : %s",
+ cfd->GetName().c_str(), job_id_,
+ compact_->status.ToString().c_str());
+ Status s = compact_->status;
+ CleanupCompaction();
+ return s;
+ }
AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_COMPACTION_INSTALL);
@@ -1336,6 +1654,10 @@ Status CompactionJob::FinishCompactionOutputFile(
TableProperties tp;
if (s.ok()) {
tp = outputs.GetTableProperties();
+ meta->num_entries = tp.num_entries;
+ meta->num_deletions = tp.num_deletions;
+ meta->raw_key_size = tp.raw_key_size;
+ meta->raw_value_size = tp.raw_value_size;
}
if (s.ok() && current_entries == 0 && tp.num_range_deletions == 0) {
@@ -1777,7 +2099,7 @@ void CompactionJob::LogCompaction() {
ROCKS_LOG_INFO(db_options_.info_log, "[%s] Compaction start summary: %s\n",
cfd->GetName().c_str(), scratch);
// build event logger report
- auto stream = event_logger_->Log();
+ auto stream = event_logger_->LogToBuffer(log_buffer_, 64*1024);
stream << "job" << job_id_ << "event"
<< "compaction_started"
<< "compaction_reason"
diff --git a/db/compaction/compaction_job.h b/db/compaction/compaction_job.h
index 2a342bddf..bd6f38f25 100644
--- a/db/compaction/compaction_job.h
+++ b/db/compaction/compaction_job.h
@@ -189,6 +189,10 @@ class CompactionJob {
// Return the IO status
IOStatus io_status() const { return io_status_; }
+ void GetSubCompactOutputs(std::vector >*) const;
+ CompactionJobStats* GetCompactionJobStats() const { return compaction_job_stats_; }
+ const InternalStats::CompactionStatsFull& GetCompactionStats() const { return compaction_stats_; }
+
protected:
void UpdateCompactionStats();
void LogCompaction();
@@ -250,6 +254,9 @@ class CompactionJob {
void NotifyOnSubcompactionCompleted(SubcompactionState* sub_compact);
+ Status RunLocal();
+ Status RunRemote();
+
uint32_t job_id_;
// DBImpl state
@@ -311,6 +318,8 @@ class CompactionJob {
// zeroed out.
SequenceNumber penultimate_level_cutoff_seqno_ = kMaxSequenceNumber;
+ std::vector > rand_key_store_;
+
// Get table file name in where it's outputting to, which should also be in
// `output_directory_`.
virtual std::string GetTableFileName(uint64_t file_number);
diff --git a/db/compaction/compaction_outputs.h b/db/compaction/compaction_outputs.h
index 635924989..f1f2b6b67 100644
--- a/db/compaction/compaction_outputs.h
+++ b/db/compaction/compaction_outputs.h
@@ -190,6 +190,8 @@ class CompactionOutputs {
return range_del_agg_ && !range_del_agg_->IsEmpty();
}
+ std::vector