diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..a6fa563c
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "ThirdParty/zstd"]
+ path = ThirdParty/zstd
+ url = https://github.com/facebook/zstd
+ branch = release
diff --git a/AnnService/Aggregator.vcxproj b/AnnService/Aggregator.vcxproj
index af24dded..4946c13f 100644
--- a/AnnService/Aggregator.vcxproj
+++ b/AnnService/Aggregator.vcxproj
@@ -165,6 +165,7 @@
+
@@ -177,5 +178,6 @@
+
\ No newline at end of file
diff --git a/AnnService/CMakeLists.txt b/AnnService/CMakeLists.txt
index a61b08a3..470f7d65 100644
--- a/AnnService/CMakeLists.txt
+++ b/AnnService/CMakeLists.txt
@@ -2,8 +2,10 @@
# Licensed under the MIT License.
set(AnnService ${PROJECT_SOURCE_DIR}/AnnService)
+set(Zstd ${PROJECT_SOURCE_DIR}/ThirdParty/zstd)
include_directories(${AnnService})
+include_directories(${Zstd}/lib)
file(GLOB_RECURSE HDR_FILES ${AnnService}/inc/Core/*.h ${AnnService}/inc/Helper/*.h)
file(GLOB_RECURSE SRC_FILES ${AnnService}/src/Core/*.cpp ${AnnService}/src/Helper/*.cpp)
@@ -32,9 +34,9 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
endif()
add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES})
-target_link_libraries (SPTAGLib DistanceUtils)
+target_link_libraries (SPTAGLib DistanceUtils libzstd_shared)
add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES})
-target_link_libraries (SPTAGLibStatic DistanceUtils)
+target_link_libraries (SPTAGLibStatic DistanceUtils libzstd_static)
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
target_compile_options(SPTAGLibStatic PRIVATE -fPIC)
endif()
diff --git a/AnnService/Client.vcxproj b/AnnService/Client.vcxproj
index f88234be..9381af59 100644
--- a/AnnService/Client.vcxproj
+++ b/AnnService/Client.vcxproj
@@ -132,6 +132,7 @@
+
@@ -144,5 +145,6 @@
+
\ No newline at end of file
diff --git a/AnnService/CoreLibrary.vcxproj b/AnnService/CoreLibrary.vcxproj
index 386c0f38..4cd347d3 100644
--- a/AnnService/CoreLibrary.vcxproj
+++ b/AnnService/CoreLibrary.vcxproj
@@ -160,6 +160,7 @@
+
@@ -219,5 +220,12 @@
+
+
+
+ This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.
+
+
+
\ No newline at end of file
diff --git a/AnnService/CoreLibrary.vcxproj.filters b/AnnService/CoreLibrary.vcxproj.filters
index 453a4795..f260b078 100644
--- a/AnnService/CoreLibrary.vcxproj.filters
+++ b/AnnService/CoreLibrary.vcxproj.filters
@@ -214,6 +214,9 @@
Header Files\Core\Common
+
+ Header Files\Core\SPANN
+
diff --git a/AnnService/IndexBuilder.vcxproj b/AnnService/IndexBuilder.vcxproj
index f82825fa..0900590c 100644
--- a/AnnService/IndexBuilder.vcxproj
+++ b/AnnService/IndexBuilder.vcxproj
@@ -155,6 +155,7 @@
+
@@ -167,5 +168,6 @@
+
\ No newline at end of file
diff --git a/AnnService/IndexSearcher.vcxproj b/AnnService/IndexSearcher.vcxproj
index 88214858..6d137837 100644
--- a/AnnService/IndexSearcher.vcxproj
+++ b/AnnService/IndexSearcher.vcxproj
@@ -156,6 +156,7 @@
+
@@ -168,5 +169,6 @@
+
\ No newline at end of file
diff --git a/AnnService/Quantizer.vcxproj b/AnnService/Quantizer.vcxproj
index bdcebcd0..942e55e1 100644
--- a/AnnService/Quantizer.vcxproj
+++ b/AnnService/Quantizer.vcxproj
@@ -171,6 +171,7 @@
+
@@ -183,5 +184,6 @@
+
\ No newline at end of file
diff --git a/AnnService/SSDServing.vcxproj b/AnnService/SSDServing.vcxproj
index bf37ca74..f6676597 100644
--- a/AnnService/SSDServing.vcxproj
+++ b/AnnService/SSDServing.vcxproj
@@ -1,4 +1,4 @@
-
+
@@ -27,6 +27,9 @@
+
+
+
15.0
{217B42B7-8F2B-4323-804C-08992CA2F65E}
@@ -177,6 +180,7 @@
+
@@ -189,5 +193,6 @@
+
\ No newline at end of file
diff --git a/AnnService/SSDServing.vcxproj.filters b/AnnService/SSDServing.vcxproj.filters
index 8f36f5f3..95d60611 100644
--- a/AnnService/SSDServing.vcxproj.filters
+++ b/AnnService/SSDServing.vcxproj.filters
@@ -27,4 +27,7 @@
Source Files
+
+
+
\ No newline at end of file
diff --git a/AnnService/Server.vcxproj b/AnnService/Server.vcxproj
index fe9a7c8d..3b38afe4 100644
--- a/AnnService/Server.vcxproj
+++ b/AnnService/Server.vcxproj
@@ -140,6 +140,7 @@
+
@@ -152,5 +153,6 @@
+
\ No newline at end of file
diff --git a/AnnService/inc/Core/SPANN/Compressor.h b/AnnService/inc/Core/SPANN/Compressor.h
new file mode 100644
index 00000000..86100608
--- /dev/null
+++ b/AnnService/inc/Core/SPANN/Compressor.h
@@ -0,0 +1,181 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#ifndef _SPTAG_SPANN_COMPRESSOR_H_
+#define _SPTAG_SPANN_COMPRESSOR_H_
+
+#include
+#include "zstd.h"
+#include "zdict.h"
+#include "../Common.h"
+
+namespace SPTAG
+{
+ namespace SPANN
+ {
+ class Compressor
+ {
+ private:
+ void CreateCDict()
+ {
+ cdict = ZSTD_createCDict((void *)dictBuffer.data(), dictBuffer.size(), compress_level);
+ if (cdict == NULL)
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD_createCDict() failed! \n");
+ exit(1);
+ }
+ }
+
+ void CreateDDict()
+ {
+ ddict = ZSTD_createDDict((void *)dictBuffer.data(), dictBuffer.size());
+ if (ddict == NULL)
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD_createDDict() failed! \n");
+ throw std::runtime_error("ZSTD_createDDict() failed!");
+ }
+ }
+
+ std::string CompressWithDict(const std::string &src)
+ {
+ size_t est_compress_size = ZSTD_compressBound(src.size());
+ std::string comp_buffer{};
+ comp_buffer.resize(est_compress_size);
+
+ ZSTD_CCtx *const cctx = ZSTD_createCCtx();
+ if (cctx == NULL)
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD_createCCtx() failed! \n");
+ exit(1);
+ }
+ size_t compressed_size = ZSTD_compress_usingCDict(cctx, (void *)comp_buffer.data(), est_compress_size, src.data(), src.size(), cdict);
+ if (ZSTD_isError(compressed_size))
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD compress error %s, \n", ZSTD_getErrorName(compressed_size));
+ exit(1);
+ }
+ ZSTD_freeCCtx(cctx);
+ comp_buffer.resize(compressed_size);
+ comp_buffer.shrink_to_fit();
+
+ return comp_buffer;
+ }
+
+ std::size_t DecompressWithDict(const char* src, size_t srcSize, char* dst, size_t dstCapacity)
+ {
+ ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+ if (dctx == NULL)
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD_createDCtx() failed! \n");
+ throw std::runtime_error("ZSTD_createDCtx() failed!");
+ }
+ std::size_t const decomp_size = ZSTD_decompress_usingDDict(dctx,
+ (void*)dst, dstCapacity, src, srcSize, ddict);
+ if (ZSTD_isError(decomp_size))
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD decompress error %s, \n", ZSTD_getErrorName(decomp_size));
+ throw std::runtime_error("ZSTD decompress failed.");
+ }
+ ZSTD_freeDCtx(dctx);
+ return decomp_size;
+ }
+
+ std::string CompressWithoutDict(const std::string &src)
+ {
+ size_t est_comp_size = ZSTD_compressBound(src.size());
+ std::string buffer{};
+ buffer.resize(est_comp_size);
+ size_t compressed_size = ZSTD_compress((void *)buffer.data(), est_comp_size,
+ src.data(), src.size(), compress_level);
+ if (ZSTD_isError(compressed_size))
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD compress error %s, \n", ZSTD_getErrorName(compressed_size));
+ exit(1);
+ }
+ buffer.resize(compressed_size);
+ buffer.shrink_to_fit();
+
+ return buffer;
+ }
+
+ std::size_t DecompressWithoutDict(const char *src, size_t srcSize, char* dst, size_t dstCapacity)
+ {
+ std::size_t const decomp_size = ZSTD_decompress(
+ (void *)dst, dstCapacity, src, srcSize);
+ if (ZSTD_isError(decomp_size))
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZSTD decompress error %s, \n", ZSTD_getErrorName(decomp_size));
+ throw std::runtime_error("ZSTD decompress failed.");
+ }
+
+ return decomp_size;
+ }
+
+ public:
+ Compressor(int level = 0, int bufferCapacity = 102400)
+ {
+ compress_level = level;
+ dictBufferCapacity = bufferCapacity;
+ cdict = nullptr;
+ ddict = nullptr;
+ }
+
+ virtual ~Compressor() {}
+
+ std::size_t TrainDict(const std::string &samplesBuffer, const size_t *samplesSizes, unsigned nbSamples)
+ {
+ dictBuffer.resize(dictBufferCapacity);
+ size_t dictSize = ZDICT_trainFromBuffer((void *)dictBuffer.data(), dictBufferCapacity, (void *)samplesBuffer.data(), &samplesSizes[0], nbSamples);
+ if (ZDICT_isError(dictSize))
+ {
+ LOG(Helper::LogLevel::LL_Error, "ZDICT_trainFromBuffer() failed: %s \n", ZDICT_getErrorName(dictSize));
+ exit(1);
+ }
+ dictBuffer.resize(dictSize);
+ dictBuffer.shrink_to_fit();
+
+ CreateCDict();
+
+ return dictSize;
+ }
+
+ std::string GetDictBuffer()
+ {
+ return dictBuffer;
+ }
+
+ void SetDictBuffer(const std::string &buffer)
+ {
+ dictBuffer = buffer;
+ CreateDDict();
+ }
+
+ std::string Compress(const std::string &src, const bool useDict)
+ {
+ return useDict ? CompressWithDict(src) : CompressWithoutDict(src);
+ }
+
+ std::size_t Decompress(const char *src, size_t srcSize, char* dst, size_t dstCapacity, const bool useDict)
+ {
+ return useDict ? DecompressWithDict(src, srcSize, dst, dstCapacity) : DecompressWithoutDict(src, srcSize, dst, dstCapacity);
+ }
+
+ // return the compressed sie
+ size_t GetCompressedSize(const std::string &src, bool useDict)
+ {
+ std::string dst = Compress(src, useDict);
+ return dst.size();
+ }
+
+ private:
+ int compress_level;
+
+ std::string dictBuffer;
+ size_t dictBufferCapacity;
+ ZSTD_CDict *cdict;
+ ZSTD_DDict *ddict;
+ };
+ } // SPANN
+} // SPTAG
+
+#endif // _SPTAG_SPANN_COMPRESSOR_H_
diff --git a/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h b/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h
index 0afb0eb7..815530b0 100644
--- a/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h
+++ b/AnnService/inc/Core/SPANN/ExtraFullGraphSearcher.h
@@ -8,11 +8,13 @@
#include "inc/Helper/AsyncFileReader.h"
#include "IExtraSearcher.h"
#include "../Common/TruthSet.h"
+#include "Compressor.h"
#include