From 8a059f144f84af00b021ce81b8936040152591fc Mon Sep 17 00:00:00 2001 From: cheqi Date: Fri, 11 Jan 2019 11:16:56 +0800 Subject: [PATCH 01/14] remove dup code --- AnnService/CoreLibrary.vcxproj | 5 + AnnService/CoreLibrary.vcxproj.filters | 18 + AnnService/inc/Core/BKT/Index.h | 312 +--- .../inc/Core/BKT/ParameterDefinitionList.h | 40 +- AnnService/inc/Core/Common/BKTree.h | 461 ++++++ AnnService/inc/Core/Common/Dataset.h | 136 +- AnnService/inc/Core/Common/KDTree.h | 327 ++++ .../inc/Core/Common/NeighborhoodGraph.h | 408 +++++ AnnService/inc/Core/Common/QueryResultSet.h | 2 +- .../Core/Common/RelativeNeighborhoodGraph.h | 120 ++ AnnService/inc/Core/Common/WorkSpace.h | 43 - AnnService/inc/Core/KDT/Index.h | 175 +-- .../inc/Core/KDT/ParameterDefinitionList.h | 38 +- AnnService/inc/Core/MetadataSet.h | 4 +- AnnService/inc/Core/VectorIndex.h | 31 +- AnnService/src/Core/BKT/BKTIndex.cpp | 1321 ++--------------- .../src/Core/Common/NeighborhoodGraph.cpp | 14 + AnnService/src/Core/KDT/KDTIndex.cpp | 1221 ++------------- AnnService/src/Core/MetadataSet.cpp | 31 +- AnnService/src/Core/VectorIndex.cpp | 153 +- AnnService/src/IndexBuilder/main.cpp | 2 +- PythonWrapper/PythonCore.vcxproj | 6 +- PythonWrapper/inc/CoreInterface.h | 2 - PythonWrapper/packages.config | 2 + PythonWrapper/src/CoreInterface.cpp | 6 - 25 files changed, 2074 insertions(+), 2804 deletions(-) create mode 100644 AnnService/inc/Core/Common/BKTree.h create mode 100644 AnnService/inc/Core/Common/KDTree.h create mode 100644 AnnService/inc/Core/Common/NeighborhoodGraph.h create mode 100644 AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h create mode 100644 AnnService/src/Core/Common/NeighborhoodGraph.cpp diff --git a/AnnService/CoreLibrary.vcxproj b/AnnService/CoreLibrary.vcxproj index 9844c709..a0e884b3 100644 --- a/AnnService/CoreLibrary.vcxproj +++ b/AnnService/CoreLibrary.vcxproj @@ -157,9 +157,14 @@ + + + + + diff --git a/AnnService/CoreLibrary.vcxproj.filters b/AnnService/CoreLibrary.vcxproj.filters index 7d27224d..c411e8ce 100644 --- a/AnnService/CoreLibrary.vcxproj.filters +++ b/AnnService/CoreLibrary.vcxproj.filters @@ -118,6 +118,18 @@ Header Files\Core\Common + + Header Files\Core\Common + + + Header Files\Core\Common + + + Header Files\Core\Common + + + Header Files\Core\Common + @@ -156,5 +168,11 @@ Source Files\Core\KDT + + Source Files\Core\Common + + + + \ No newline at end of file diff --git a/AnnService/inc/Core/BKT/Index.h b/AnnService/inc/Core/BKT/Index.h index c14aa815..2ead4acd 100644 --- a/AnnService/inc/Core/BKT/Index.h +++ b/AnnService/inc/Core/BKT/Index.h @@ -1,267 +1,109 @@ #ifndef _SPTAG_BKT_INDEX_H_ #define _SPTAG_BKT_INDEX_H_ -#include "../SearchQuery.h" -#include "../VectorIndex.h" #include "../Common.h" +#include "../VectorIndex.h" #include "../Common/CommonUtils.h" #include "../Common/DistanceUtils.h" #include "../Common/QueryResultSet.h" -#include "../Common/Heap.h" #include "../Common/Dataset.h" #include "../Common/WorkSpace.h" #include "../Common/WorkSpacePool.h" -#include "../Common/FineGrainedLock.h" -#include "../Common/DataUtils.h" +#include "../Common/RelativeNeighborhoodGraph.h" +#include "../Common/BKTree.h" +#include "inc/Helper/SimpleIniReader.h" +#include "inc/Helper/StringConvert.h" #include #include -#include #include namespace SPTAG { -namespace Helper -{ -class IniReader; -} - - -namespace BKT -{ - // node type for storing BKT - struct BKTNode + namespace Helper { - int centerid; - int childStart; - int childEnd; - - BKTNode(int cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {} - }; - - template - struct KmeansArgs { - int _K; - int _D; - int _T; - T* centers; - int* counts; - float* newCenters; - int* newCounts; - char* label; - int* clusterIdx; - float* clusterDist; - T* newTCenters; - - KmeansArgs(int k, int dim, int datasize, int threadnum): _K(k), _D(dim), _T(threadnum) { - centers = new T[k * dim]; - counts = new int[k]; - newCenters = new float[threadnum * k * dim]; - newCounts = new int[threadnum * k]; - label = new char[datasize]; - clusterIdx = new int[threadnum * k]; - clusterDist = new float[threadnum * k]; - newTCenters = new T[k * dim]; - } - - ~KmeansArgs() { - delete[] centers; - delete[] counts; - delete[] newCenters; - delete[] newCounts; - delete[] label; - delete[] clusterIdx; - delete[] clusterDist; - delete[] newTCenters; - } - - inline void ClearCounts() { - memset(newCounts, 0, sizeof(int) * _T * _K); - } - - inline void ClearCenters() { - memset(newCenters, 0, sizeof(float) * _T * _K * _D); - } - - inline void ClearDists(float dist) { - for (int i = 0; i < _T * _K; i++) { - clusterIdx[i] = -1; - clusterDist[i] = dist; - } - } - - void Shuffle(std::vector& indices, int first, int last) { - int* pos = new int[_K]; - pos[0] = first; - for (int k = 1; k < _K; k++) pos[k] = pos[k - 1] + newCounts[k - 1]; - - for (int k = 0; k < _K; k++) { - if (newCounts[k] == 0) continue; - int i = pos[k]; - while (newCounts[k] > 0) { - int swapid = pos[(int)(label[i])] + newCounts[(int)(label[i])] - 1; - newCounts[(int)(label[i])]--; - std::swap(indices[i], indices[swapid]); - std::swap(label[i], label[swapid]); - } - while (indices[i] != clusterIdx[k]) i++; - std::swap(indices[i], indices[pos[k] + counts[k] - 1]); - } - delete[] pos; - } - }; + class IniReader; + } - template - class Index : public VectorIndex + namespace BKT { - private: - // Initial data points - int m_iDataSize; - int m_iDataDimension; - COMMON::Dataset m_pSamples; + template + class Index : public VectorIndex + { + private: + // data points + COMMON::Dataset m_pSamples; - // BKT structures. - int m_iBKTNumber; - std::vector m_pBKTStart; - std::vector m_pBKTRoots; + // BKT structures. + COMMON::BKTree m_pTrees; - // Graph structure - int m_iGraphSize; - int m_iNeighborhoodSize; - COMMON::Dataset m_pNeighborhoodGraph; + // Graph structure + COMMON::RelativeNeighborhoodGraph m_pGraph; - // Variables for building BKTs and TPTs - int m_iBKTKmeansK; - int m_iBKTLeafSize; - int m_iSamples; - int m_iTptreeNumber; - int m_iTPTLeafSize; - int m_numTopDimensionTpTreeSplit; + std::string m_sBKTFilename; + std::string m_sGraphFilename; + std::string m_sDataPointsFilename; - // Variables for building graph - int m_iRefineIter; - int m_iCEF; - int m_iMaxCheckForRefineGraph; - int m_iMaxCheck; - std::unordered_map m_pSampleToCenter; + std::mutex m_dataLock; // protect data and graph + tbb::concurrent_unordered_set m_deletedID; + std::unique_ptr m_workSpacePool; - // Load from files directly - std::string m_sBKTFilename; - std::string m_sGraphFilename; - std::string m_sDataPointsFilename; - - // Load from memory mapped files - char* m_pBKTMemoryFile; - char* m_pGraphMemoryFile; - char* m_pDataPointsMemoryFile; - - DistCalcMethod m_iDistCalcMethod; - float(*m_fComputeDistance)(const T* pX, const T* pY, int length); - - int m_iCacheSize; - int m_iDebugLoad; - - int g_iThresholdOfNumberOfContinuousNoBetterPropagation; - int g_iNumberOfInitialDynamicPivots; - int g_iNumberOfOtherDynamicPivots; - - int m_iNumberOfThreads; - std::mutex m_dataAllocLock; - COMMON::FineGrainedLock m_dataUpdateLock; - tbb::concurrent_unordered_set m_deletedID; - std::unique_ptr m_workSpacePool; - public: - Index() : m_iBKTNumber(1), - m_iBKTKmeansK(32), - m_iBKTLeafSize(8), - m_iSamples(1000), - m_iNeighborhoodSize(32), - m_iTptreeNumber(32), - m_iTPTLeafSize(2000), - m_numTopDimensionTpTreeSplit(5), - m_iRefineIter(0), - m_iCEF(1000), - m_iMaxCheckForRefineGraph(10000), - m_iMaxCheck(2048), - m_pBKTMemoryFile(NULL), - m_pGraphMemoryFile(NULL), - m_pDataPointsMemoryFile(NULL), - m_sBKTFilename("tree.bin"), - m_sGraphFilename("graph.bin"), - m_sDataPointsFilename("vectors.bin"), - m_iNumberOfThreads(1), - m_iDistCalcMethod(DistCalcMethod::Cosine), - m_fComputeDistance(COMMON::DistanceCalcSelector(DistCalcMethod::Cosine)), - m_iCacheSize(-1), - m_iDebugLoad(-1), - g_iThresholdOfNumberOfContinuousNoBetterPropagation(3), - g_iNumberOfInitialDynamicPivots(50), - g_iNumberOfOtherDynamicPivots(4) {} - - ~Index() { - m_pBKTRoots.clear(); - } - int GetNumSamples() const { return m_pSamples.R(); } - int GetFeatureDim() const { return m_pSamples.C(); } - int GetNumThreads() const { return m_iNumberOfThreads; } - int GetCurrMaxCheck() const { return m_iMaxCheck; } - - DistCalcMethod GetDistCalcMethod() const { return m_iDistCalcMethod; } - IndexAlgoType GetIndexAlgoType() const { return IndexAlgoType::BKT; } - VectorValueType GetVectorValueType() const { return GetEnumValueType(); } - - ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension); + int m_iNumberOfThreads; + DistCalcMethod m_iDistCalcMethod; + float(*m_fComputeDistance)(const T* pX, const T* pY, int length); - ErrorCode LoadIndex(const std::string& p_folderPath); - ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs); - - ErrorCode SaveIndex(const std::string& p_folderPath); - - void SearchIndex(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const; - ErrorCode SearchIndex(QueryResult &p_query) const; - - ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension); - ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum); - ErrorCode RefineIndex(const std::string& p_folderPath); - ErrorCode MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2); + int m_iMaxCheck; + int m_iThresholdOfNumberOfContinuousNoBetterPropagation; + int m_iNumberOfInitialDynamicPivots; + int m_iNumberOfOtherDynamicPivots; + public: + Index() : + m_sBKTFilename("tree.bin"), + m_sGraphFilename("graph.bin"), + m_sDataPointsFilename("vectors.bin"), + m_iNumberOfThreads(1), + m_iDistCalcMethod(DistCalcMethod::Cosine), + m_fComputeDistance(COMMON::DistanceCalcSelector(DistCalcMethod::Cosine)), + m_iMaxCheck(2048), + m_iThresholdOfNumberOfContinuousNoBetterPropagation(3), + m_iNumberOfInitialDynamicPivots(50), + m_iNumberOfOtherDynamicPivots(4) {} + + ~Index() {} + + inline int GetNumSamples() const { return m_pSamples.R(); } + inline int GetFeatureDim() const { return m_pSamples.C(); } - ErrorCode SetParameter(const char* p_param, const char* p_value); - std::string GetParameter(const char* p_param) const; - - private: - // Functions for loading models from files - bool LoadDataPoints(std::string sDataPointsFileName); - bool LoadBKT(std::string sBKTFilename); - bool LoadGraph(std::string sGraphFilename); - - // Functions for loading models from memory mapped files - bool LoadDataPoints(char* pDataPointsMemFile); - bool LoadBKT(char* pBKTMemFile); - bool LoadGraph(char* pGraphMemFile); - - bool SaveDataPoints(std::string sDataPointsFileName); - - // Functions for building balanced kmeans tree - void BuildBKT(std::vector& indices, std::vector& newStart, std::vector& newRoot); - bool SaveBKT(std::string sBKTFilename, std::vector& newStart, std::vector& newRoot) const; - float KmeansAssign(std::vector& indices, const int first, const int last, KmeansArgs& args, bool updateCenters); - int KmeansClustering(std::vector& indices, const int first, const int last, KmeansArgs& args); - - // Functions for building Graph - void BuildRNG(); - bool SaveRNG(std::string sGraphFilename) const; - void PartitionByTptree(std::vector &indices, - const int first, - const int last, - std::vector> &leaves); - void RefineRNG(); - void RefineRNGNode(const int node, COMMON::WorkSpace &space, bool updateNeighbors); - void RebuildRNGNodeNeighbors(int* nodes, const BasicResult* queryResults, int numResults); - float GraphAccuracyEstimation(int NSample, bool rng); - }; -} // namespace BKT + inline int GetCurrMaxCheck() const { return m_iMaxCheck; } + inline int GetNumThreads() const { return m_iNumberOfThreads; } + inline DistCalcMethod GetDistCalcMethod() const { return m_iDistCalcMethod; } + inline IndexAlgoType GetIndexAlgoType() const { return IndexAlgoType::BKT; } + inline VectorValueType GetVectorValueType() const { return GetEnumValueType(); } + + inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); } + inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; } + + ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension); + + ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs); + + ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout); + ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader); + ErrorCode SearchIndex(QueryResult &p_query) const; + ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension); + ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum); + + ErrorCode SetParameter(const char* p_param, const char* p_value); + std::string GetParameter(const char* p_param) const; + + private: + ErrorCode RefineIndex(const std::string& p_folderPath); + void SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const; + void SearchIndexWithoutDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const; + }; + } // namespace BKT } // namespace SPTAG #endif // _SPTAG_BKT_INDEX_H_ diff --git a/AnnService/inc/Core/BKT/ParameterDefinitionList.h b/AnnService/inc/Core/BKT/ParameterDefinitionList.h index 237b9da4..15c0cc01 100644 --- a/AnnService/inc/Core/BKT/ParameterDefinitionList.h +++ b/AnnService/inc/Core/BKT/ParameterDefinitionList.h @@ -5,25 +5,29 @@ DefineBKTParameter(m_sBKTFilename, std::string, std::string("tree.bin"), "TreeFi DefineBKTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath") DefineBKTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath") -DefineBKTParameter(m_iBKTNumber, int, 1L, "BKTNumber") -DefineBKTParameter(m_iBKTKmeansK, int, 32L, "BKTKmeansK") -DefineBKTParameter(m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") -DefineBKTParameter(m_iBKTLeafSize, int, 8L, "BKTLeafSize") -DefineBKTParameter(m_iSamples, int, 1000L, "Samples") -DefineBKTParameter(m_iTptreeNumber, int, 32L, "TpTreeNumber") -DefineBKTParameter(m_iTPTLeafSize, int, 2000L, "TPTLeafSize") -DefineBKTParameter(m_numTopDimensionTpTreeSplit, int, 5L, "NumTopDimensionTpTreeSplit") -DefineBKTParameter(m_iCEF, int, 1000L, "CEF") -DefineBKTParameter(m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckForRefineGraph") -DefineBKTParameter(m_iMaxCheck, int, 8192L, "MaxCheck") -DefineBKTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads") +DefineBKTParameter(m_pTrees.m_iTreeNumber, int, 1L, "BKTNumber") +DefineBKTParameter(m_pTrees.m_iBKTKmeansK, int, 32L, "BKTKmeansK") +DefineBKTParameter(m_pTrees.m_iBKTLeafSize, int, 8L, "BKTLeafSize") +DefineBKTParameter(m_pTrees.m_iSamples, int, 1000L, "Samples") + + +DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TpTreeNumber") +DefineBKTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize") +DefineBKTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTpTreeSplit") -DefineBKTParameter(g_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation") -DefineBKTParameter(g_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots") -DefineBKTParameter(g_iNumberOfOtherDynamicPivots, int, 4L, "NumberOfOtherDynamicPivots") +DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") +DefineBKTParameter(m_pGraph.m_iNeighborhoodScale, int, 16L, "GraphNeighborhoodScale") +DefineBKTParameter(m_pGraph.m_iCEFScale, int, 4L, "GraphCEFScale") +DefineBKTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations") +DefineBKTParameter(m_pGraph.m_iCEF, int, 1000L, "CEF") +DefineBKTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckForRefineGraph") +DefineBKTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads") DefineBKTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod") -DefineBKTParameter(m_iRefineIter, int, 0L, "RefineIterations") -DefineBKTParameter(m_iDebugLoad, int, -1, "NumTrains") -DefineBKTParameter(m_iCacheSize, int, -1, "CacheSize") + +DefineBKTParameter(m_iMaxCheck, int, 8192L, "MaxCheck") +DefineBKTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation") +DefineBKTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots") +DefineBKTParameter(m_iNumberOfOtherDynamicPivots, int, 4L, "NumberOfOtherDynamicPivots") + #endif diff --git a/AnnService/inc/Core/Common/BKTree.h b/AnnService/inc/Core/Common/BKTree.h new file mode 100644 index 00000000..70140621 --- /dev/null +++ b/AnnService/inc/Core/Common/BKTree.h @@ -0,0 +1,461 @@ +#ifndef _SPTAG_COMMON_BKTREE_H_ +#define _SPTAG_COMMON_BKTREE_H_ + +#include +#include +#include +#include + +#include "../VectorIndex.h" + +#include "CommonUtils.h" +#include "QueryResultSet.h" +#include "WorkSpace.h" + +#pragma warning(disable:4996) // 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. + +namespace SPTAG +{ + namespace COMMON + { + // node type for storing BKT + struct BKTNode + { + int centerid; + int childStart; + int childEnd; + + BKTNode(int cid = -1) : centerid(cid), childStart(-1), childEnd(-1) {} + }; + + template + struct KmeansArgs { + int _K; + int _D; + int _T; + T* centers; + int* counts; + float* newCenters; + int* newCounts; + char* label; + int* clusterIdx; + float* clusterDist; + T* newTCenters; + + KmeansArgs(int k, int dim, int datasize, int threadnum) : _K(k), _D(dim), _T(threadnum) { + centers = new T[k * dim]; + counts = new int[k]; + newCenters = new float[threadnum * k * dim]; + newCounts = new int[threadnum * k]; + label = new char[datasize]; + clusterIdx = new int[threadnum * k]; + clusterDist = new float[threadnum * k]; + newTCenters = new T[k * dim]; + } + + ~KmeansArgs() { + delete[] centers; + delete[] counts; + delete[] newCenters; + delete[] newCounts; + delete[] label; + delete[] clusterIdx; + delete[] clusterDist; + delete[] newTCenters; + } + + inline void ClearCounts() { + memset(newCounts, 0, sizeof(int) * _T * _K); + } + + inline void ClearCenters() { + memset(newCenters, 0, sizeof(float) * _T * _K * _D); + } + + inline void ClearDists(float dist) { + for (int i = 0; i < _T * _K; i++) { + clusterIdx[i] = -1; + clusterDist[i] = dist; + } + } + + void Shuffle(std::vector& indices, int first, int last) { + int* pos = new int[_K]; + pos[0] = first; + for (int k = 1; k < _K; k++) pos[k] = pos[k - 1] + newCounts[k - 1]; + + for (int k = 0; k < _K; k++) { + if (newCounts[k] == 0) continue; + int i = pos[k]; + while (newCounts[k] > 0) { + int swapid = pos[(int)(label[i])] + newCounts[(int)(label[i])] - 1; + newCounts[(int)(label[i])]--; + std::swap(indices[i], indices[swapid]); + std::swap(label[i], label[swapid]); + } + while (indices[i] != clusterIdx[k]) i++; + std::swap(indices[i], indices[pos[k] + counts[k] - 1]); + } + delete[] pos; + } + }; + + class BKTree + { + public: + BKTree(): m_iTreeNumber(1), m_iBKTKmeansK(32), m_iBKTLeafSize(8), m_iSamples(1000) {} + + BKTree(BKTree& other): m_iTreeNumber(other.m_iTreeNumber), + m_iBKTKmeansK(other.m_iBKTKmeansK), + m_iBKTLeafSize(other.m_iBKTLeafSize), + m_iSamples(other.m_iSamples) {} + ~BKTree() {} + + inline const BKTNode& operator[](int index) const { return m_pTreeRoots[index]; } + inline BKTNode& operator[](int index) { return m_pTreeRoots[index]; } + + inline int size() const { return (int)m_pTreeRoots.size(); } + + inline const std::unordered_map& GetSampleMap() const { return m_pSampleCenterMap; } + + template + void BuildTrees(VectorIndex* index, std::vector* indices = nullptr) + { + struct BKTStackItem { + int index, first, last; + BKTStackItem(int index_, int first_, int last_) : index(index_), first(first_), last(last_) {} + }; + std::stack ss; + + std::vector localindices; + if (indices == nullptr) { + localindices.resize(index->GetNumSamples()); + for (int i = 0; i < index->GetNumSamples(); i++) localindices[i] = i; + } + else { + localindices.assign(indices->begin(), indices->end()); + } + KmeansArgs args(m_iBKTKmeansK, index->GetFeatureDim(), (int)localindices.size(), omp_get_num_threads()); + + m_pSampleCenterMap.clear(); + for (char i = 0; i < m_iTreeNumber; i++) + { + std::random_shuffle(localindices.begin(), localindices.end()); + + m_pTreeStart.push_back((int)m_pTreeRoots.size()); + m_pTreeRoots.push_back(BKTNode((int)localindices.size())); + std::cout << "Start to build BKTree " << i + 1 << std::endl; + + ss.push(BKTStackItem(m_pTreeStart[i], 0, (int)localindices.size())); + while (!ss.empty()) { + BKTStackItem item = ss.top(); ss.pop(); + int newBKTid = (int)m_pTreeRoots.size(); + m_pTreeRoots[item.index].childStart = newBKTid; + if (item.last - item.first <= m_iBKTLeafSize) { + for (int j = item.first; j < item.last; j++) { + m_pTreeRoots.push_back(BKTNode(localindices[j])); + } + } + else { // clustering the data into BKTKmeansK clusters + int numClusters = KmeansClustering(index, localindices, item.first, item.last, args); + if (numClusters <= 1) { + int end = min(item.last + 1, (int)localindices.size()); + std::sort(localindices.begin() + item.first, localindices.begin() + end); + m_pTreeRoots[item.index].centerid = localindices[item.first]; + m_pTreeRoots[item.index].childStart = -m_pTreeRoots[item.index].childStart; + for (int j = item.first + 1; j < end; j++) { + m_pTreeRoots.push_back(BKTNode(localindices[j])); + m_pSampleCenterMap[localindices[j]] = m_pTreeRoots[item.index].centerid; + } + m_pSampleCenterMap[-1 - m_pTreeRoots[item.index].centerid] = item.index; + } + else { + for (int k = 0; k < m_iBKTKmeansK; k++) { + if (args.counts[k] == 0) continue; + m_pTreeRoots.push_back(BKTNode(localindices[item.first + args.counts[k] - 1])); + if (args.counts[k] > 1) ss.push(BKTStackItem(newBKTid++, item.first, item.first + args.counts[k] - 1)); + item.first += args.counts[k]; + } + } + } + m_pTreeRoots[item.index].childEnd = (int)m_pTreeRoots.size(); + } + std::cout << i + 1 << " BKTree built, " << m_pTreeRoots.size() - m_pTreeStart[i] << " " << localindices.size() << std::endl; + } + } + + bool SaveTrees(std::string sTreeFileName) const + { + std::cout << "Save BKT to " << sTreeFileName << std::endl; + FILE *fp = fopen(sTreeFileName.c_str(), "wb"); + if (fp == NULL) return false; + + fwrite(&m_iTreeNumber, sizeof(int), 1, fp); + fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); + int treeNodeSize = (int)m_pTreeRoots.size(); + fwrite(&treeNodeSize, sizeof(int), 1, fp); + fwrite(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp); + fclose(fp); + std::cout << "Save BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; + return true; + } + + bool LoadTrees(char* pBKTMemFile) + { + m_iTreeNumber = *((int*)pBKTMemFile); + pBKTMemFile += sizeof(int); + m_pTreeStart.resize(m_iTreeNumber); + memcpy(m_pTreeStart.data(), pBKTMemFile, sizeof(int) * m_iTreeNumber); + pBKTMemFile += sizeof(int)*m_iTreeNumber; + + int treeNodeSize = *((int*)pBKTMemFile); + pBKTMemFile += sizeof(int); + m_pTreeRoots.resize(treeNodeSize); + memcpy(m_pTreeRoots.data(), pBKTMemFile, sizeof(BKTNode) * treeNodeSize); + return true; + } + + bool LoadTrees(std::string sTreeFileName) + { + std::cout << "Load BKT From " << sTreeFileName << std::endl; + FILE *fp = fopen(sTreeFileName.c_str(), "rb"); + if (fp == NULL) return false; + + fread(&m_iTreeNumber, sizeof(int), 1, fp); + m_pTreeStart.resize(m_iTreeNumber); + fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); + + int treeNodeSize; + fread(&treeNodeSize, sizeof(int), 1, fp); + m_pTreeRoots.resize(treeNodeSize); + fread(m_pTreeRoots.data(), sizeof(BKTNode), treeNodeSize, fp); + fclose(fp); + std::cout << "Load BKT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; + return true; + } + + template + void InitSearchTrees(const VectorIndex* p_index, const COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const + { + for (char i = 0; i < m_iTreeNumber; i++) { + const BKTNode& node = m_pTreeRoots[m_pTreeStart[i]]; + if (node.childStart < 0) { + p_space.m_SPTQueue.insert(COMMON::HeapCell(m_pTreeStart[i], p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(node.centerid)))); + } + else { + for (int begin = node.childStart; begin < node.childEnd; begin++) { + int index = m_pTreeRoots[begin].centerid; + p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index)))); + } + } + } + } + + template + void SearchTrees(const VectorIndex* p_index, const COMMON::QueryResultSet &p_query, + COMMON::WorkSpace &p_space, const int p_limits) const + { + do + { + COMMON::HeapCell bcell = p_space.m_SPTQueue.pop(); + const BKTNode& tnode = m_pTreeRoots[bcell.node]; + if (tnode.childStart < 0) { + if (!p_space.CheckAndSet(tnode.centerid)) { + p_space.m_iNumberOfCheckedLeaves++; + p_space.m_NGQueue.insert(COMMON::HeapCell(tnode.centerid, bcell.distance)); + } + if (p_space.m_iNumberOfCheckedLeaves >= p_limits) break; + } + else { + if (!p_space.CheckAndSet(tnode.centerid)) { + p_space.m_NGQueue.insert(COMMON::HeapCell(tnode.centerid, bcell.distance)); + } + for (int begin = tnode.childStart; begin < tnode.childEnd; begin++) { + int index = m_pTreeRoots[begin].centerid; + p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, p_index->ComputeDistance((const void*)p_query.GetTarget(), p_index->GetSample(index)))); + } + } + } while (!p_space.m_SPTQueue.empty()); + } + + private: + + template + float KmeansAssign(VectorIndex* p_index, + std::vector& indices, + const int first, const int last, KmeansArgs& args, const bool updateCenters) const { + float currDist = 0; + int threads = omp_get_num_threads(); + float lambda = (updateCenters) ? COMMON::Utils::GetBase() * COMMON::Utils::GetBase() / (100.0f * (last - first)) : 0.0f; + int subsize = (last - first - 1) / threads + 1; + +#pragma omp parallel for + for (int tid = 0; tid < threads; tid++) + { + int istart = first + tid * subsize; + int iend = min(first + (tid + 1) * subsize, last); + int *inewCounts = args.newCounts + tid * m_iBKTKmeansK; + float *inewCenters = args.newCenters + tid * m_iBKTKmeansK * p_index->GetFeatureDim(); + int * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK; + float * iclusterDist = args.clusterDist + tid * m_iBKTKmeansK; + float idist = 0; + for (int i = istart; i < iend; i++) { + int clusterid = 0; + float smallestDist = MaxDist; + for (int k = 0; k < m_iBKTKmeansK; k++) { + float dist = p_index->ComputeDistance(p_index->GetSample(indices[i]), (const void*)(args.centers + k*p_index->GetFeatureDim())) + lambda*args.counts[k]; + if (dist > -MaxDist && dist < smallestDist) { + clusterid = k; smallestDist = dist; + } + } + args.label[i] = clusterid; + inewCounts[clusterid]++; + idist += smallestDist; + if (updateCenters) { + const T* v = (const T*)p_index->GetSample(indices[i]); + float* center = inewCenters + clusterid*p_index->GetFeatureDim(); + for (int j = 0; j < p_index->GetFeatureDim(); j++) center[j] += v[j]; + if (smallestDist > iclusterDist[clusterid]) { + iclusterDist[clusterid] = smallestDist; + iclusterIdx[clusterid] = indices[i]; + } + } + else { + if (smallestDist <= iclusterDist[clusterid]) { + iclusterDist[clusterid] = smallestDist; + iclusterIdx[clusterid] = indices[i]; + } + } + } + COMMON::Utils::atomic_float_add(&currDist, idist); + } + + for (int i = 1; i < threads; i++) { + for (int k = 0; k < m_iBKTKmeansK; k++) + args.newCounts[k] += args.newCounts[i*m_iBKTKmeansK + k]; + } + + if (updateCenters) { + for (int i = 1; i < threads; i++) { + float* currCenter = args.newCenters + i*m_iBKTKmeansK*p_index->GetFeatureDim(); + for (int j = 0; j < m_iBKTKmeansK * p_index->GetFeatureDim(); j++) args.newCenters[j] += currCenter[j]; + } + + int maxcluster = 0; + for (int k = 1; k < m_iBKTKmeansK; k++) if (args.newCounts[maxcluster] < args.newCounts[k]) maxcluster = k; + + int maxid = maxcluster; + for (int tid = 1; tid < threads; tid++) { + if (args.clusterDist[maxid] < args.clusterDist[tid * m_iBKTKmeansK + maxcluster]) maxid = tid * m_iBKTKmeansK + maxcluster; + } + if (args.clusterIdx[maxid] < 0 || args.clusterIdx[maxid] >= p_index->GetNumSamples()) + std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error maxid:" << maxid << " dist:" << args.clusterDist[maxid] << std::endl; + maxid = args.clusterIdx[maxid]; + + for (int k = 0; k < m_iBKTKmeansK; k++) { + T* TCenter = args.newTCenters + k * p_index->GetFeatureDim(); + if (args.newCounts[k] == 0) { + //int nextid = Utils::rand_int(last, first); + //while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first); + int nextid = maxid; + std::memcpy(TCenter, p_index->GetSample(nextid), sizeof(T)*p_index->GetFeatureDim()); + } + else { + float* currCenters = args.newCenters + k * p_index->GetFeatureDim(); + for (int j = 0; j < p_index->GetFeatureDim(); j++) currCenters[j] /= args.newCounts[k]; + + if (p_index->GetDistCalcMethod() == DistCalcMethod::Cosine) { + COMMON::Utils::Normalize(currCenters, p_index->GetFeatureDim(), COMMON::Utils::GetBase()); + } + for (int j = 0; j < p_index->GetFeatureDim(); j++) TCenter[j] = (T)(currCenters[j]); + } + } + } + else { + for (int i = 1; i < threads; i++) { + for (int k = 0; k < m_iBKTKmeansK; k++) { + if (args.clusterIdx[i*m_iBKTKmeansK + k] != -1 && args.clusterDist[i*m_iBKTKmeansK + k] <= args.clusterDist[k]) { + args.clusterDist[k] = args.clusterDist[i*m_iBKTKmeansK + k]; + args.clusterIdx[k] = args.clusterIdx[i*m_iBKTKmeansK + k]; + } + } + } + } + return currDist; + } + + template + int KmeansClustering(VectorIndex* p_index, + std::vector& indices, const int first, const int last, KmeansArgs& args) const { + int iterLimit = 100; + + int batchEnd = min(first + m_iSamples, last); + float currDiff, currDist, minClusterDist = MaxDist; + for (int numKmeans = 0; numKmeans < 3; numKmeans++) { + for (int k = 0; k < m_iBKTKmeansK; k++) { + int randid = COMMON::Utils::rand_int(last, first); + std::memcpy(args.centers + k*p_index->GetFeatureDim(), p_index->GetSample(indices[randid]), sizeof(T)*p_index->GetFeatureDim()); + } + args.ClearCounts(); + currDist = KmeansAssign(p_index, indices, first, batchEnd, args, false); + if (currDist < minClusterDist) { + minClusterDist = currDist; + memcpy(args.newTCenters, args.centers, sizeof(T)*m_iBKTKmeansK*p_index->GetFeatureDim()); + memcpy(args.counts, args.newCounts, sizeof(int) * m_iBKTKmeansK); + } + } + + minClusterDist = MaxDist; + int noImprovement = 0; + for (int iter = 0; iter < iterLimit; iter++) { + std::memcpy(args.centers, args.newTCenters, sizeof(T)*m_iBKTKmeansK*p_index->GetFeatureDim()); + std::random_shuffle(indices.begin() + first, indices.begin() + last); + + args.ClearCenters(); + args.ClearCounts(); + args.ClearDists(-MaxDist); + currDist = KmeansAssign(p_index, indices, first, batchEnd, args, true); + memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK); + + currDiff = 0; + for (int k = 0; k < m_iBKTKmeansK; k++) { + currDiff += p_index->ComputeDistance((const void*)(args.centers + k*p_index->GetFeatureDim()), (const void*)(args.newTCenters + k*p_index->GetFeatureDim())); + } + + if (currDist < minClusterDist) { + noImprovement = 0; + minClusterDist = currDist; + } + else { + noImprovement++; + } + if (currDiff < 1e-3 || noImprovement >= 5) break; + } + + args.ClearCounts(); + args.ClearDists(MaxDist); + currDist = KmeansAssign(p_index, indices, first, last, args, false); + memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK); + + int numClusters = 0; + for (int i = 0; i < m_iBKTKmeansK; i++) if (args.counts[i] > 0) numClusters++; + + if (numClusters <= 1) { + //if (last - first > 1) std::cout << "large cluster:" << last - first << " dist:" << currDist << std::endl; + return numClusters; + } + args.Shuffle(indices, first, last); + return numClusters; + } + + private: + std::vector m_pTreeStart; + std::vector m_pTreeRoots; + std::unordered_map m_pSampleCenterMap; + + public: + int m_iTreeNumber, m_iBKTKmeansK, m_iBKTLeafSize, m_iSamples; + }; + } +} +#endif diff --git a/AnnService/inc/Core/Common/Dataset.h b/AnnService/inc/Core/Common/Dataset.h index 4753b088..97b399fc 100644 --- a/AnnService/inc/Core/Common/Dataset.h +++ b/AnnService/inc/Core/Common/Dataset.h @@ -8,6 +8,8 @@ #define aligned_malloc(a, b) _mm_malloc(a, b) #define aligned_free(a) _mm_free(a) +#pragma warning(disable:4996) // 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. + namespace SPTAG { namespace COMMON @@ -21,86 +23,158 @@ namespace SPTAG int cols; bool ownData = false; T* data = nullptr; - std::vector* dataIncremental = nullptr; + std::vector dataIncremental; public: - Dataset() {} - Dataset(int rows_, int cols_, T* data_ = nullptr) + Dataset(): rows(0), cols(1) {} + Dataset(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true) { - Initialize(rows_, cols_, data_); + Initialize(rows_, cols_, data_, transferOnwership_); } ~Dataset() { if (ownData) aligned_free(data); - if (dataIncremental) { - dataIncremental->clear(); - delete dataIncremental; - } } - void Initialize(int rows_, int cols_, T* data_ = nullptr) + void Initialize(int rows_, int cols_, T* data_ = nullptr, bool transferOnwership_ = true) { rows = rows_; cols = cols_; data = data_; - if (data == nullptr) + if (data_ == nullptr || !transferOnwership_) { ownData = true; data = (T*)aligned_malloc(sizeof(T) * rows * cols, ALIGN); + if (data_ != nullptr) memcpy(data, data_, rows * cols * sizeof(T)); + else std::memset(data, -1, rows * cols * sizeof(T)); } - dataIncremental = new std::vector(); } void SetR(int R_) { if (R_ >= rows) - dataIncremental->resize((R_ - rows) * cols); + dataIncremental.resize((R_ - rows) * cols); else { rows = R_; - dataIncremental->clear(); + dataIncremental.clear(); } } - int R() const { return (int)(rows + dataIncremental->size() / cols); } - int C() const { return cols; } + inline int R() const { return (int)(rows + dataIncremental.size() / cols); } + inline int C() const { return cols; } T* operator[](int index) { if (index >= rows) { - return dataIncremental->data() + (size_t)(index - rows)*cols; + return dataIncremental.data() + (size_t)(index - rows)*cols; } return data + (size_t)index*cols; } + const T* operator[](int index) const { if (index >= rows) { - return dataIncremental->data() + (size_t)(index - rows)*cols; + return dataIncremental.data() + (size_t)(index - rows)*cols; } return data + (size_t)index*cols; } - T* GetData() + void AddBatch(const T* pData, int num) + { + dataIncremental.insert(dataIncremental.end(), pData, pData + num*cols); + } + + void AddBatch(int num) { - return data; + dataIncremental.insert(dataIncremental.end(), (size_t)num*cols, T(-1)); } - void reset() + bool Save(std::string sDataPointsFileName) { - if (ownData) { - aligned_free(data); - ownData = false; + std::cout << "Save Data To " << sDataPointsFileName << std::endl; + FILE * fp = fopen(sDataPointsFileName.c_str(), "wb"); + if (fp == NULL) return false; + + int CR = R(); + fwrite(&CR, sizeof(int), 1, fp); + fwrite(&cols, sizeof(int), 1, fp); + + T* ptr = data; + int toWrite = rows; + while (toWrite > 0) + { + size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp); + ptr += write * cols; + toWrite -= (int)write; } - if (dataIncremental) { - dataIncremental->clear(); - delete dataIncremental; + ptr = dataIncremental.data(); + toWrite = CR - rows; + while (toWrite > 0) + { + size_t write = fwrite(ptr, sizeof(T) * cols, toWrite, fp); + ptr += write * cols; + toWrite -= (int)write; } + fclose(fp); + + std::cout << "Save Data (" << CR << ", " << cols << ") Finish!" << std::endl; + return true; } - - void AddBatch(const T* pData, int num) + + bool Load(std::string sDataPointsFileName) { - dataIncremental->insert(dataIncremental->end(), pData, pData + num*cols); + std::cout << "Load Data From " << sDataPointsFileName << std::endl; + FILE * fp = fopen(sDataPointsFileName.c_str(), "rb"); + if (fp == NULL) return false; + + int R, C; + fread(&R, sizeof(int), 1, fp); + fread(&C, sizeof(int), 1, fp); + + Initialize(R, C); + T* ptr = data; + while (R > 0) { + size_t read = fread(ptr, sizeof(T) * C, R, fp); + ptr += read * C; + R -= (int)read; + } + fclose(fp); + std::cout << "Load Data (" << rows << ", " << cols << ") Finish!" << std::endl; + return true; } - void AddBatch(int num) + // Functions for loading models from memory mapped files + bool Load(char* pDataPointsMemFile) + { + int R, C; + R = *((int*)pDataPointsMemFile); + pDataPointsMemFile += sizeof(int); + + C = *((int*)pDataPointsMemFile); + pDataPointsMemFile += sizeof(int); + + Initialize(R, C, (T*)pDataPointsMemFile); + return true; + } + + bool Refine(const std::vector& indices, std::string sDataPointsFileName) { - dataIncremental->insert(dataIncremental->end(), (size_t)num*cols, T(-1)); + std::cout << "Save Refine Data To " << sDataPointsFileName << std::endl; + FILE * fp = fopen(sDataPointsFileName.c_str(), "wb"); + if (fp == NULL) return false; + + int R = (int)(indices.size()); + fwrite(&R, sizeof(int), 1, fp); + fwrite(&cols, sizeof(int), 1, fp); + + // write point one by one in case for cache miss + for (int i = 0; i < R; i++) { + if (indices[i] < rows) + fwrite(data + (size_t)indices[i] * cols, sizeof(T) * cols, 1, fp); + else + fwrite(dataIncremental.data() + (size_t)(indices[i] - rows) * cols, sizeof(T) * cols, 1, fp); + } + fclose(fp); + + std::cout << "Save Refine Data (" << R << ", " << cols << ") Finish!" << std::endl; + return true; } }; } diff --git a/AnnService/inc/Core/Common/KDTree.h b/AnnService/inc/Core/Common/KDTree.h new file mode 100644 index 00000000..ab2e1779 --- /dev/null +++ b/AnnService/inc/Core/Common/KDTree.h @@ -0,0 +1,327 @@ +#ifndef _SPTAG_COMMON_KDTREE_H_ +#define _SPTAG_COMMON_KDTREE_H_ + +#include +#include +#include + +#include "../VectorIndex.h" + +#include "CommonUtils.h" +#include "QueryResultSet.h" +#include "WorkSpace.h" + +#pragma warning(disable:4996) // 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. + +namespace SPTAG +{ + namespace COMMON + { + // node type for storing KDT + struct KDTNode + { + int left; + int right; + short split_dim; + float split_value; + }; + + class KDTree + { + public: + KDTree() : m_iTreeNumber(2), m_numTopDimensionKDTSplit(5), m_iSamples(1000) {} + + KDTree(KDTree& other) : m_iTreeNumber(other.m_iTreeNumber), + m_numTopDimensionKDTSplit(other.m_numTopDimensionKDTSplit), + m_iSamples(other.m_iSamples) {} + ~KDTree() {} + + inline const KDTNode& operator[](int index) const { return m_pTreeRoots[index]; } + inline KDTNode& operator[](int index) { return m_pTreeRoots[index]; } + + inline int size() const { return (int)m_pTreeRoots.size(); } + + template + void BuildTrees(VectorIndex* p_index, std::vector* indices = nullptr) + { + std::vector localindices; + if (indices == nullptr) { + localindices.resize(p_index->GetNumSamples()); + for (int i = 0; i < p_index->GetNumSamples(); i++) localindices[i] = i; + } + else { + localindices.assign(indices->begin(), indices->end()); + } + + m_pTreeRoots.resize(m_iTreeNumber * localindices.size()); + m_pTreeStart.resize(m_iTreeNumber, 0); +#pragma omp parallel for + for (int i = 0; i < m_iTreeNumber; i++) + { + Sleep(i * 100); std::srand(clock()); + + std::vector pindices(localindices.begin(), localindices.end()); + std::random_shuffle(pindices.begin(), pindices.end()); + + m_pTreeStart[i] = i * (int)pindices.size(); + std::cout << "Start to build KDTree " << i + 1 << std::endl; + int iTreeSize = m_pTreeStart[i]; + DivideTree(p_index, pindices, 0, (int)pindices.size() - 1, m_pTreeStart[i], iTreeSize); + std::cout << i + 1 << " KDTree built, " << iTreeSize - m_pTreeStart[i] << " " << pindices.size() << std::endl; + } + } + + bool SaveTrees(std::string sTreeFileName) const + { + std::cout << "Save KDT to " << sTreeFileName << std::endl; + FILE *fp = fopen(sTreeFileName.c_str(), "wb"); + if (fp == NULL) return false; + + fwrite(&m_iTreeNumber, sizeof(int), 1, fp); + fwrite(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); + int treeNodeSize = (int)m_pTreeRoots.size(); + fwrite(&treeNodeSize, sizeof(int), 1, fp); + fwrite(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp); + fclose(fp); + std::cout << "Save KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; + return true; + } + + bool LoadTrees(char* pKDTMemFile) + { + m_iTreeNumber = *((int*)pKDTMemFile); + pKDTMemFile += sizeof(int); + m_pTreeStart.resize(m_iTreeNumber); + memcpy(m_pTreeStart.data(), pKDTMemFile, sizeof(int) * m_iTreeNumber); + pKDTMemFile += sizeof(int)*m_iTreeNumber; + + int treeNodeSize = *((int*)pKDTMemFile); + pKDTMemFile += sizeof(int); + m_pTreeRoots.resize(treeNodeSize); + memcpy(m_pTreeRoots.data(), pKDTMemFile, sizeof(KDTNode) * treeNodeSize); + return true; + } + + bool LoadTrees(std::string sTreeFileName) + { + std::cout << "Load KDT From " << sTreeFileName << std::endl; + FILE *fp = fopen(sTreeFileName.c_str(), "rb"); + if (fp == NULL) return false; + + fread(&m_iTreeNumber, sizeof(int), 1, fp); + m_pTreeStart.resize(m_iTreeNumber); + fread(m_pTreeStart.data(), sizeof(int), m_iTreeNumber, fp); + + int treeNodeSize; + fread(&treeNodeSize, sizeof(int), 1, fp); + m_pTreeRoots.resize(treeNodeSize); + fread(m_pTreeRoots.data(), sizeof(KDTNode), treeNodeSize, fp); + fclose(fp); + std::cout << "Load KDT (" << m_iTreeNumber << "," << treeNodeSize << ") Finish!" << std::endl; + return true; + } + + template + void InitSearchTrees(const VectorIndex* p_index, const COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const int p_limits) const + { + for (char i = 0; i < m_iTreeNumber; i++) { + KDTSearch(p_index, p_query, p_space, m_pTreeStart[i], true, 0); + } + + while (!p_space.m_SPTQueue.empty() && p_space.m_iNumberOfCheckedLeaves < p_limits) + { + auto& tcell = p_space.m_SPTQueue.pop(); + if (p_query.worstDist() < tcell.distance) break; + KDTSearch(p_index, p_query, p_space, tcell.node, true, tcell.distance); + } + } + + template + void SearchTrees(const VectorIndex* p_index, const COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const int p_limits) const + { + while (!p_space.m_SPTQueue.empty() && p_space.m_iNumberOfCheckedLeaves < p_limits) + { + auto& tcell = p_space.m_SPTQueue.pop(); + KDTSearch(p_index, p_query, p_space, tcell.node, false, tcell.distance); + } + } + + private: + + template + void KDTSearch(const VectorIndex* p_index, const COMMON::QueryResultSet &p_query, + COMMON::WorkSpace& p_space, const int node, const bool isInit, const float distBound) const { + if (node < 0) + { + int index = -node - 1; + if (index >= p_index->GetNumSamples()) return; +#ifdef PREFETCH + const char* data = (const char *)(p_index->GetSample(index)); + _mm_prefetch(data, _MM_HINT_T0); + _mm_prefetch(data + 64, _MM_HINT_T0); +#endif + if (p_space.CheckAndSet(index)) return; + + ++p_space.m_iNumberOfTreeCheckedLeaves; + ++p_space.m_iNumberOfCheckedLeaves; + p_space.m_NGQueue.insert(COMMON::HeapCell(index, p_index->ComputeDistance((const void*)p_query.GetTarget(), (const void*)data))); + return; + } + + auto& tnode = m_pTreeRoots[node]; + + float diff = (p_query.GetTarget())[tnode.split_dim] - tnode.split_value; + float distanceBound = distBound + diff * diff; + int otherChild, bestChild; + if (diff < 0) + { + bestChild = tnode.left; + otherChild = tnode.right; + } + else + { + otherChild = tnode.left; + bestChild = tnode.right; + } + + if (!isInit || distanceBound < p_query.worstDist()) + { + p_space.m_SPTQueue.insert(COMMON::HeapCell(otherChild, distanceBound)); + } + KDTSearch(p_index, p_query, p_space, bestChild, isInit, distBound); + } + + + template + void DivideTree(VectorIndex* p_index, std::vector& indices, int first, int last, + int index, int &iTreeSize) { + ChooseDivision(p_index, m_pTreeRoots[index], indices, first, last); + int i = Subdivide(p_index, m_pTreeRoots[index], indices, first, last); + if (i - 1 <= first) + { + m_pTreeRoots[index].left = -indices[first] - 1; + } + else + { + iTreeSize++; + m_pTreeRoots[index].left = iTreeSize; + DivideTree(p_index, indices, first, i - 1, iTreeSize, iTreeSize); + } + if (last == i) + { + m_pTreeRoots[index].right = -indices[last] - 1; + } + else + { + iTreeSize++; + m_pTreeRoots[index].right = iTreeSize; + DivideTree(p_index, indices, i, last, iTreeSize, iTreeSize); + } + } + + template + void ChooseDivision(VectorIndex* p_index, KDTNode& node, const std::vector& indices, const int first, const int last) + { + std::vector meanValues(p_index->GetFeatureDim(), 0); + std::vector varianceValues(p_index->GetFeatureDim(), 0); + int end = min(first + m_iSamples, last); + int count = end - first + 1; + // calculate the mean of each dimension + for (int j = first; j <= end; j++) + { + const T* v = (const T*)p_index->GetSample(indices[j]); + for (int k = 0; k < p_index->GetFeatureDim(); k++) + { + meanValues[k] += v[k]; + } + } + for (int k = 0; k < p_index->GetFeatureDim(); k++) + { + meanValues[k] /= count; + } + // calculate the variance of each dimension + for (int j = first; j <= end; j++) + { + const T* v = (const T*)p_index->GetSample(indices[j]); + for (int k = 0; k < p_index->GetFeatureDim(); k++) + { + float dist = v[k] - meanValues[k]; + varianceValues[k] += dist*dist; + } + } + // choose the split dimension as one of the dimension inside TOP_DIM maximum variance + node.split_dim = SelectDivisionDimension(varianceValues); + // determine the threshold + node.split_value = meanValues[node.split_dim]; + } + + int SelectDivisionDimension(const std::vector& varianceValues) const + { + // Record the top maximum variances + std::vector topind(m_numTopDimensionKDTSplit); + int num = 0; + // order the variances + for (int i = 0; i < varianceValues.size(); i++) + { + if (num < m_numTopDimensionKDTSplit || varianceValues[i] > varianceValues[topind[num - 1]]) + { + if (num < m_numTopDimensionKDTSplit) + { + topind[num++] = i; + } + else + { + topind[num - 1] = i; + } + int j = num - 1; + // order the TOP_DIM variances + while (j > 0 && varianceValues[topind[j]] > varianceValues[topind[j - 1]]) + { + std::swap(topind[j], topind[j - 1]); + j--; + } + } + } + // randomly choose a dimension from TOP_DIM + return topind[COMMON::Utils::rand_int(num)]; + } + + template + int Subdivide(VectorIndex* p_index, const KDTNode& node, std::vector& indices, const int first, const int last) const + { + int i = first; + int j = last; + // decide which child one point belongs + while (i <= j) + { + int ind = indices[i]; + const T* v = (const T*)p_index->GetSample(ind); + float val = v[node.split_dim]; + if (val < node.split_value) + { + i++; + } + else + { + std::swap(indices[i], indices[j]); + j--; + } + } + // if all the points in the node are equal,equally split the node into 2 + if ((i == first) || (i == last + 1)) + { + i = (first + last + 1) / 2; + } + return i; + } + + private: + std::vector m_pTreeStart; + std::vector m_pTreeRoots; + + public: + int m_iTreeNumber, m_numTopDimensionKDTSplit, m_iSamples; + }; + } +} +#endif diff --git a/AnnService/inc/Core/Common/NeighborhoodGraph.h b/AnnService/inc/Core/Common/NeighborhoodGraph.h new file mode 100644 index 00000000..9e646b69 --- /dev/null +++ b/AnnService/inc/Core/Common/NeighborhoodGraph.h @@ -0,0 +1,408 @@ +#ifndef _SPTAG_COMMON_NG_H_ +#define _SPTAG_COMMON_NG_H_ + +#include "../VectorIndex.h" + +#include "CommonUtils.h" +#include "Dataset.h" +#include "FineGrainedLock.h" +#include "QueryResultSet.h" + +namespace SPTAG +{ + namespace COMMON + { + class NeighborhoodGraph + { + public: + NeighborhoodGraph(): m_iTPTNumber(32), + m_iTPTLeafSize(2000), + m_iSamples(1000), + m_numTopDimensionTPTSplit(5), + m_iNeighborhoodSize(32), + m_iNeighborhoodScale(16), + m_iCEFScale(4), + m_iRefineIter(0), + m_iCEF(1000), + m_iMaxCheckForRefineGraph(10000) {} + + ~NeighborhoodGraph() {} + + virtual void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist) = 0; + + virtual void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) = 0; + + virtual float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map* idmap = nullptr) = 0; + + template + void BuildGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr) + { + std::cout << "build RNG graph!" << std::endl; + + m_iGraphSize = index->GetNumSamples(); + m_iNeighborhoodSize = m_iNeighborhoodSize * m_iNeighborhoodScale; + m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize); + m_dataUpdateLock.resize(m_iGraphSize); + + if (m_iGraphSize < 1000) { + RefineGraph(index, idmap); + std::cout << "Build RNG Graph end!" << std::endl; + return; + } + + { + COMMON::Dataset NeighborhoodDists(m_iGraphSize, m_iNeighborhoodSize); + std::vector> TptreeDataIndices(m_iTPTNumber, std::vector(m_iGraphSize)); + std::vector>> TptreeLeafNodes(m_iTPTNumber, std::vector>()); + + for (int i = 0; i < m_iGraphSize; i++) + for (int j = 0; j < m_iNeighborhoodSize; j++) + (NeighborhoodDists)[i][j] = MaxDist; + + std::cout << "Parallel TpTree Partition begin " << std::endl; +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < m_iTPTNumber; i++) + { + Sleep(i * 100); std::srand(clock()); + for (int j = 0; j < m_iGraphSize; j++) TptreeDataIndices[i][j] = j; + std::random_shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end()); + PartitionByTptree(index, TptreeDataIndices[i], 0, m_iGraphSize - 1, TptreeLeafNodes[i]); + std::cout << "Finish Getting Leaves for Tree " << i << std::endl; + } + std::cout << "Parallel TpTree Partition done" << std::endl; + + for (int i = 0; i < m_iTPTNumber; i++) + { +#pragma omp parallel for schedule(dynamic) + for (int j = 0; j < TptreeLeafNodes[i].size(); j++) + { + int start_index = TptreeLeafNodes[i][j].first; + int end_index = TptreeLeafNodes[i][j].second; + if (omp_get_thread_num() == 0) std::cout << "\rProcessing Tree " << i << ' ' << j * 100 / TptreeLeafNodes[i].size() << '%'; + for (int x = start_index; x < end_index; x++) + { + for (int y = x + 1; y <= end_index; y++) + { + int p1 = TptreeDataIndices[i][x]; + int p2 = TptreeDataIndices[i][y]; + float dist = index->ComputeDistance(index->GetSample(p1), index->GetSample(p2)); + if (idmap != nullptr) { + p1 = (idmap->find(p1) == idmap->end()) ? p1 : idmap->at(p1); + p2 = (idmap->find(p2) == idmap->end()) ? p2 : idmap->at(p2); + } + COMMON::Utils::AddNeighbor(p2, dist, (m_pNeighborhoodGraph)[p1], (NeighborhoodDists)[p1], m_iNeighborhoodSize); + COMMON::Utils::AddNeighbor(p1, dist, (m_pNeighborhoodGraph)[p2], (NeighborhoodDists)[p2], m_iNeighborhoodSize); + } + } + } + TptreeDataIndices[i].clear(); + TptreeLeafNodes[i].clear(); + std::cout << std::endl; + } + TptreeDataIndices.clear(); + TptreeLeafNodes.clear(); + } + + if (m_iMaxCheckForRefineGraph > 0) { + RefineGraph(index, idmap); + } + } + + template + void RefineGraph(VectorIndex* index, const std::unordered_map* idmap = nullptr) + { + m_iCEF *= m_iCEFScale; + m_iMaxCheckForRefineGraph *= m_iCEFScale; + +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < m_iGraphSize; i++) + { + RefineNode(index, i, false); + } + std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(index, 100, idmap) << std::endl; + + m_iCEF /= m_iCEFScale; + m_iMaxCheckForRefineGraph /= m_iCEFScale; + m_iNeighborhoodSize /= m_iNeighborhoodScale; + +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < m_iGraphSize; i++) + { + RefineNode(index, i, false); + } + std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(index, 100, idmap) << std::endl; + + if (idmap != nullptr) { + for (auto iter = idmap->begin(); iter != idmap->end(); iter++) + if (iter->first < 0) + { + m_pNeighborhoodGraph[-1 - iter->first][m_iNeighborhoodSize - 1] = -2 - iter->second; + } + } + } + + template + ErrorCode RefineGraph(VectorIndex* index, std::vector& indices, std::vector& reverseIndices, + std::string graphFileName, const std::unordered_map* idmap = nullptr) + { + int R = (int)indices.size(); + +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < R; i++) + { + RefineNode(index, indices[i], false); + int* nodes = m_pNeighborhoodGraph[indices[i]]; + for (int j = 0; j < m_iNeighborhoodSize; j++) + { + if (nodes[j] < 0) nodes[j] = -1; + else nodes[j] = reverseIndices[nodes[j]]; + } + if (idmap == nullptr || idmap->find(-1 - indices[i]) == idmap->end()) continue; + nodes[m_iNeighborhoodSize - 1] = -2 - idmap->at(-1 - indices[i]); + } + + std::ofstream graphOut(graphFileName, std::ios::binary); + if (!graphOut.is_open()) return ErrorCode::FailedCreateFile; + graphOut.write((char*)&R, sizeof(int)); + graphOut.write((char*)&m_iNeighborhoodSize, sizeof(int)); + for (int i = 0; i < R; i++) { + graphOut.write((char*)m_pNeighborhoodGraph[indices[i]], sizeof(int) * m_iNeighborhoodSize); + } + graphOut.close(); + return ErrorCode::Success; + } + + + template + void RefineNode(VectorIndex* index, const int node, bool updateNeighbors) + { + COMMON::QueryResultSet query((const T*)index->GetSample(node), m_iCEF + 1); + index->SearchIndex(query); + RebuildNeighbors(index, node, m_pNeighborhoodGraph[node], query.GetResults(), m_iCEF + 1); + + if (updateNeighbors) { + // update neighbors + for (int j = 0; j <= m_iCEF; j++) + { + BasicResult* item = query.GetResult(j); + if (item->VID < 0) break; + if (item->VID == node) continue; + + std::lock_guard lock(m_dataUpdateLock[item->VID]); + InsertNeighbors(index, item->VID, node, item->Dist); + } + } + } + + template + void PartitionByTptree(VectorIndex* index, std::vector& indices, const int first, const int last, + std::vector> & leaves) + { + if (last - first <= m_iTPTLeafSize) + { + leaves.push_back(std::make_pair(first, last)); + } + else + { + std::vector Mean(index->GetFeatureDim(), 0); + + int iIteration = 100; + int end = min(first + m_iSamples, last); + int count = end - first + 1; + // calculate the mean of each dimension + for (int j = first; j <= end; j++) + { + const T* v = (const T*)index->GetSample(indices[j]); + for (int k = 0; k < index->GetFeatureDim(); k++) + { + Mean[k] += v[k]; + } + } + for (int k = 0; k < index->GetFeatureDim(); k++) + { + Mean[k] /= count; + } + std::vector Variance; + Variance.reserve(index->GetFeatureDim()); + for (int j = 0; j < index->GetFeatureDim(); j++) + { + Variance.push_back(BasicResult(j, 0)); + } + // calculate the variance of each dimension + for (int j = first; j <= end; j++) + { + const T* v = (const T*)index->GetSample(indices[j]); + for (int k = 0; k < index->GetFeatureDim(); k++) + { + float dist = v[k] - Mean[k]; + Variance[k].Dist += dist*dist; + } + } + std::sort(Variance.begin(), Variance.end(), COMMON::Compare); + std::vector indexs(m_numTopDimensionTPTSplit); + std::vector weight(m_numTopDimensionTPTSplit), bestweight(m_numTopDimensionTPTSplit); + float bestvariance = Variance[index->GetFeatureDim() - 1].Dist; + for (int i = 0; i < m_numTopDimensionTPTSplit; i++) + { + indexs[i] = Variance[index->GetFeatureDim() - 1 - i].VID; + bestweight[i] = 0; + } + bestweight[0] = 1; + float bestmean = Mean[indexs[0]]; + + std::vector Val(count); + for (int i = 0; i < iIteration; i++) + { + float sumweight = 0; + for (int j = 0; j < m_numTopDimensionTPTSplit; j++) + { + weight[j] = float(rand() % 10000) / 5000.0f - 1.0f; + sumweight += weight[j] * weight[j]; + } + sumweight = sqrt(sumweight); + for (int j = 0; j < m_numTopDimensionTPTSplit; j++) + { + weight[j] /= sumweight; + } + float mean = 0; + for (int j = 0; j < count; j++) + { + Val[j] = 0; + const T* v = (const T*)index->GetSample(indices[first + j]); + for (int k = 0; k < m_numTopDimensionTPTSplit; k++) + { + Val[j] += weight[k] * v[indexs[k]]; + } + mean += Val[j]; + } + mean /= count; + float var = 0; + for (int j = 0; j < count; j++) + { + float dist = Val[j] - mean; + var += dist * dist; + } + if (var > bestvariance) + { + bestvariance = var; + bestmean = mean; + for (int j = 0; j < m_numTopDimensionTPTSplit; j++) + { + bestweight[j] = weight[j]; + } + } + } + int i = first; + int j = last; + // decide which child one point belongs + while (i <= j) + { + float val = 0; + const T* v = (const T*)index->GetSample(indices[i]); + for (int k = 0; k < m_numTopDimensionTPTSplit; k++) + { + val += bestweight[k] * v[indexs[k]]; + } + if (val < bestmean) + { + i++; + } + else + { + std::swap(indices[i], indices[j]); + j--; + } + } + // if all the points in the node are equal,equally split the node into 2 + if ((i == first) || (i == last + 1)) + { + i = (first + last + 1) / 2; + } + + Mean.clear(); + Variance.clear(); + Val.clear(); + indexs.clear(); + weight.clear(); + bestweight.clear(); + + PartitionByTptree(index, indices, first, i - 1, leaves); + PartitionByTptree(index, indices, i, last, leaves); + } + } + + bool LoadGraph(std::string sGraphFilename) + { + std::cout << "Load Graph From " << sGraphFilename << std::endl; + FILE * fp = fopen(sGraphFilename.c_str(), "rb"); + if (fp == NULL) return false; + + fread(&m_iGraphSize, sizeof(int), 1, fp); + fread(&m_iNeighborhoodSize, sizeof(int), 1, fp); + m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize); + m_dataUpdateLock.resize(m_iGraphSize); + + for (int i = 0; i < m_iGraphSize; i++) + { + fread((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); + } + fclose(fp); + std::cout << "Load Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl; + return true; + } + + bool SetGraph(char* pGraphMemFile) + { + m_iGraphSize = *((int*)pGraphMemFile); + pGraphMemFile += sizeof(int); + + m_iNeighborhoodSize = *((int*)pGraphMemFile); + pGraphMemFile += sizeof(int); + + m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize, (int*)pGraphMemFile); + m_dataUpdateLock.resize(m_iGraphSize); + return true; + } + + bool SaveGraph(std::string sGraphFilename) const + { + std::cout << "Save Graph To " << sGraphFilename << std::endl; + FILE *fp = fopen(sGraphFilename.c_str(), "wb"); + if (fp == NULL) return false; + + fwrite(&m_iGraphSize, sizeof(int), 1, fp); + fwrite(&m_iNeighborhoodSize, sizeof(int), 1, fp); + for (int i = 0; i < m_iGraphSize; i++) + { + fwrite((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); + } + fclose(fp); + std::cout << "Save Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl; + return true; + } + + inline void AddBatch(int num) { m_pNeighborhoodGraph.AddBatch(num); m_iGraphSize += num; m_dataUpdateLock.resize(m_iGraphSize); } + + inline int* operator[](int index) { return m_pNeighborhoodGraph[index]; } + + inline const int* operator[](int index) const { return m_pNeighborhoodGraph[index]; } + + inline void SetR(int rows) { m_pNeighborhoodGraph.SetR(rows); m_iGraphSize = rows; m_dataUpdateLock.resize(m_iGraphSize); } + + inline int R() const { return m_iGraphSize; } + + static std::shared_ptr CreateInstance(std::string type); + + protected: + // Graph structure + int m_iGraphSize; + COMMON::Dataset m_pNeighborhoodGraph; + COMMON::FineGrainedLock m_dataUpdateLock; // protect one row of the graph + + public: + int m_iTPTNumber, m_iTPTLeafSize, m_iSamples, m_numTopDimensionTPTSplit; + int m_iNeighborhoodSize, m_iNeighborhoodScale, m_iCEFScale, m_iRefineIter, m_iCEF, m_iMaxCheckForRefineGraph; + }; + } +} +#endif diff --git a/AnnService/inc/Core/Common/QueryResultSet.h b/AnnService/inc/Core/Common/QueryResultSet.h index 33dcf5c7..f410b29d 100644 --- a/AnnService/inc/Core/Common/QueryResultSet.h +++ b/AnnService/inc/Core/Common/QueryResultSet.h @@ -38,7 +38,7 @@ class QueryResultSet : public QueryResult m_target = p_target; } - inline const T* GetTarget() + inline const T* GetTarget() const { return reinterpret_cast(m_target); } diff --git a/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h b/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h new file mode 100644 index 00000000..83d5ee4a --- /dev/null +++ b/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h @@ -0,0 +1,120 @@ +#ifndef _SPTAG_COMMON_RNG_H_ +#define _SPTAG_COMMON_RNG_H_ + +#include "NeighborhoodGraph.h" + +namespace SPTAG +{ + namespace COMMON + { + class RelativeNeighborhoodGraph: public NeighborhoodGraph + { + public: + void RebuildNeighbors(VectorIndex* index, const int node, int* nodes, const BasicResult* queryResults, const int numResults) { + int count = 0; + for (int j = 0; j < numResults && count < m_iNeighborhoodSize; j++) { + const BasicResult& item = queryResults[j]; + if (item.VID < 0) break; + if (item.VID == node) continue; + + bool good = true; + for (int k = 0; k < count; k++) { + if (index->ComputeDistance(index->GetSample(nodes[k]), index->GetSample(item.VID)) <= item.Dist) { + good = false; + break; + } + } + if (good) nodes[count++] = item.VID; + } + for (int j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1; + } + + void InsertNeighbors(VectorIndex* index, const int node, int insertNode, float insertDist) + { + int* nodes = m_pNeighborhoodGraph[node]; + for (int k = 0; k < m_iNeighborhoodSize; k++) + { + int tmpNode = nodes[k]; + if (tmpNode < -1) continue; + + if (tmpNode < 0) + { + bool good = true; + for (int t = 0; t < k; t++) { + if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) { + good = false; + break; + } + } + if (good) { + nodes[k] = insertNode; + } + break; + } + float tmpDist = index->ComputeDistance(index->GetSample(node), index->GetSample(tmpNode)); + if (insertDist < tmpDist || (insertDist == tmpDist && insertNode < tmpNode)) + { + bool good = true; + for (int t = 0; t < k; t++) { + if (index->ComputeDistance(index->GetSample(insertNode), index->GetSample(nodes[t])) < insertDist) { + good = false; + break; + } + } + if (good) { + nodes[k] = insertNode; + insertNode = tmpNode; + insertDist = tmpDist; + } + else { + break; + } + } + } + } + + float GraphAccuracyEstimation(VectorIndex* index, const int samples, const std::unordered_map* idmap = nullptr) + { + int* correct = new int[samples]; + +#pragma omp parallel for schedule(dynamic) + for (int i = 0; i < samples; i++) + { + int x = COMMON::Utils::rand_int(m_iGraphSize); + //int x = i; + COMMON::QueryResultSet query(nullptr, m_iCEF); + for (int y = 0; y < m_iGraphSize; y++) + { + if ((idmap != nullptr && idmap->find(y) != idmap->end())) continue; + float dist = index->ComputeDistance(index->GetSample(x), index->GetSample(y)); + query.AddPoint(y, dist); + } + query.SortResult(); + int * exact_rng = new int[m_iNeighborhoodSize]; + RebuildNeighbors(index, x, exact_rng, query.GetResults(), m_iCEF); + + correct[i] = 0; + for (int j = 0; j < m_iNeighborhoodSize; j++) { + if (exact_rng[j] == -1) { + correct[i] += m_iNeighborhoodSize - j; + break; + } + for (int k = 0; k < m_iNeighborhoodSize; k++) + if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j]) { + correct[i]++; + break; + } + } + delete[] exact_rng; + } + float acc = 0; + for (int i = 0; i < samples; i++) acc += float(correct[i]); + acc = acc / samples / m_iNeighborhoodSize; + delete[] correct; + return acc; + } + + }; + } +} +#endif \ No newline at end of file diff --git a/AnnService/inc/Core/Common/WorkSpace.h b/AnnService/inc/Core/Common/WorkSpace.h index f17ff0bb..f2ce87a0 100644 --- a/AnnService/inc/Core/Common/WorkSpace.h +++ b/AnnService/inc/Core/Common/WorkSpace.h @@ -126,49 +126,6 @@ namespace SPTAG } }; - template - class CountVector - { - size_t m_bytes; - T* m_data; - T m_count; - T MAX; - - public: - void Init(int size) - { - m_bytes = sizeof(T) * size; - m_data = new T[size]; - m_count = 0; - MAX = ((std::numeric_limits::max)()); - memset(m_data, 0, m_bytes); - } - - CountVector() :m_data(nullptr) {} - CountVector(int size) { Init(size); } - ~CountVector() { if (m_data != nullptr) delete[] m_data; } - - inline void clear() - { - if (m_count == MAX) - { - memset(m_data, 0, m_bytes); - m_count = 1; - } - else - { - m_count++; - } - } - - inline bool CheckAndSet(int idx) - { - if (m_data[idx] == m_count) return true; - m_data[idx] = m_count; - return false; - } - }; - // Variables for each single NN search struct WorkSpace { diff --git a/AnnService/inc/Core/KDT/Index.h b/AnnService/inc/Core/KDT/Index.h index d21e76b0..5dd094e1 100644 --- a/AnnService/inc/Core/KDT/Index.h +++ b/AnnService/inc/Core/KDT/Index.h @@ -1,19 +1,19 @@ #ifndef _SPTAG_KDT_INDEX_H_ #define _SPTAG_KDT_INDEX_H_ -#include "../SearchQuery.h" -#include "../VectorIndex.h" #include "../Common.h" +#include "../VectorIndex.h" #include "../Common/CommonUtils.h" #include "../Common/DistanceUtils.h" #include "../Common/QueryResultSet.h" -#include "../Common/Heap.h" #include "../Common/Dataset.h" #include "../Common/WorkSpace.h" #include "../Common/WorkSpacePool.h" -#include "../Common/FineGrainedLock.h" -#include "../Common/DataUtils.h" +#include "../Common/RelativeNeighborhoodGraph.h" +#include "../Common/KDTree.h" +#include "inc/Helper/StringConvert.h" +#include "inc/Helper/SimpleIniReader.h" #include #include @@ -29,168 +29,79 @@ namespace SPTAG namespace KDT { - // node type for storing KDT - struct KDTNode - { - int left; - int right; - short split_dim; - float split_value; - }; - template class Index : public VectorIndex { private: - // Initial data points - int m_iDataSize; - int m_iDataDimension; + // data points COMMON::Dataset m_pSamples; // KDT structures. - int m_iKDTNumber; - std::vector m_pKDTStart; - std::vector m_pKDTRoots; - int m_numTopDimensionKDTSplit; - int m_numSamplesKDTSplitConsideration; + COMMON::KDTree m_pTrees; // Graph structure - int m_iGraphSize; - int m_iNeighborhoodSize; - COMMON::Dataset m_pNeighborhoodGraph; - - // Variables for building TPTs - int m_iTPTNumber; - int m_iTPTLeafSize; - int m_numTopDimensionTPTSplit; - int m_numSamplesTPTSplitConsideration; - - // Variables for building graph - int m_iRefineIter; - int m_iCEF; - int m_iMaxCheckForRefineGraph; - int m_iMaxCheck; - std::unordered_map m_pSampleToCenter; + COMMON::RelativeNeighborhoodGraph m_pGraph; - // Load from files directly std::string m_sKDTFilename; std::string m_sGraphFilename; std::string m_sDataPointsFilename; - // Load from memory mapped files - char* m_pKDTMemoryFile; - char* m_pGraphMemoryFile; - char* m_pDataPointsMemoryFile; - - DistCalcMethod m_iDistCalcMethod; - float(*m_fComputeDistance)(const T* pX, const T* pY, int length); - - int m_iCacheSize; - int m_iDebugLoad; - - int g_iThresholdOfNumberOfContinuousNoBetterPropagation; - int g_iNumberOfInitialDynamicPivots; - int g_iNumberOfOtherDynamicPivots; - - int m_iNumberOfThreads; - std::mutex m_dataAllocLock; - COMMON::FineGrainedLock m_dataUpdateLock; + std::mutex m_dataLock; // protect data and graph tbb::concurrent_unordered_set m_deletedID; std::unique_ptr m_workSpacePool; + + int m_iNumberOfThreads; + DistCalcMethod m_iDistCalcMethod; + float(*m_fComputeDistance)(const T* pX, const T* pY, int length); + + int m_iMaxCheck; + int m_iThresholdOfNumberOfContinuousNoBetterPropagation; + int m_iNumberOfInitialDynamicPivots; + int m_iNumberOfOtherDynamicPivots; public: - Index() : m_iKDTNumber(1), - m_numTopDimensionKDTSplit(5), - m_numSamplesKDTSplitConsideration(100), - m_iNeighborhoodSize(32), - m_iTPTNumber(32), - m_iTPTLeafSize(2000), - m_numTopDimensionTPTSplit(5), - m_numSamplesTPTSplitConsideration(1000), - m_iRefineIter(0), - m_iCEF(1000), - m_iMaxCheckForRefineGraph(10000), - m_iMaxCheck(2048), - m_pKDTMemoryFile(NULL), - m_pGraphMemoryFile(NULL), - m_pDataPointsMemoryFile(NULL), + Index() : m_sKDTFilename("tree.bin"), m_sGraphFilename("graph.bin"), m_sDataPointsFilename("vectors.bin"), m_iNumberOfThreads(1), m_iDistCalcMethod(DistCalcMethod::Cosine), m_fComputeDistance(COMMON::DistanceCalcSelector(DistCalcMethod::Cosine)), - m_iCacheSize(-1), - m_iDebugLoad(-1), - g_iThresholdOfNumberOfContinuousNoBetterPropagation(3), - g_iNumberOfInitialDynamicPivots(50), - g_iNumberOfOtherDynamicPivots(4) {} - - ~Index() { - m_pKDTRoots.clear(); - } - int GetNumSamples() const { return m_pSamples.R(); } - int GetFeatureDim() const { return m_pSamples.C(); } - int GetNumThreads() const { return m_iNumberOfThreads; } - int GetCurrMaxCheck() const { return m_iMaxCheck; } - - DistCalcMethod GetDistCalcMethod() const { return m_iDistCalcMethod; } - IndexAlgoType GetIndexAlgoType() const { return IndexAlgoType::KDT; } - VectorValueType GetVectorValueType() const { return GetEnumValueType(); } + m_iMaxCheck(2048), + m_iThresholdOfNumberOfContinuousNoBetterPropagation(3), + m_iNumberOfInitialDynamicPivots(50), + m_iNumberOfOtherDynamicPivots(4) {} + + ~Index() {} + + inline int GetNumSamples() const { return m_pSamples.R(); } + inline int GetFeatureDim() const { return m_pSamples.C(); } + + inline int GetCurrMaxCheck() const { return m_iMaxCheck; } + inline int GetNumThreads() const { return m_iNumberOfThreads; } + inline DistCalcMethod GetDistCalcMethod() const { return m_iDistCalcMethod; } + inline IndexAlgoType GetIndexAlgoType() const { return IndexAlgoType::KDT; } + inline VectorValueType GetVectorValueType() const { return GetEnumValueType(); } + + inline float ComputeDistance(const void* pX, const void* pY) const { return m_fComputeDistance((const T*)pX, (const T*)pY, m_pSamples.C()); } + inline const void* GetSample(const int idx) const { return (void*)m_pSamples[idx]; } ErrorCode BuildIndex(const void* p_data, int p_vectorNum, int p_dimension); - ErrorCode LoadIndex(const std::string& p_folderPath); - ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs); - ErrorCode SaveIndex(const std::string& p_folderPath); + ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs); - void SearchIndex(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const; + ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout); + ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader); ErrorCode SearchIndex(QueryResult &p_query) const; - ErrorCode AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension); ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum); - ErrorCode RefineIndex(const std::string& p_folderPath); - ErrorCode MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2); ErrorCode SetParameter(const char* p_param, const char* p_value); std::string GetParameter(const char* p_param) const; private: - // Functions for loading models from files - bool LoadDataPoints(std::string sDataPointsFileName); - bool LoadKDT(std::string sKDTFilename); - bool LoadGraph(std::string sGraphFilename); - - // Functions for loading models from memory mapped files - bool LoadDataPoints(char* pDataPointsMemFile); - bool LoadKDT(char* pKDTMemFile); - bool LoadGraph(char* pGraphMemFile); - - bool SaveDataPoints(std::string sDataPointsFileName); - - // Functions for building kdtree - void BuildKDT(std::vector& indices, std::vector& newStart, std::vector& newRoot); - bool SaveKDT(std::string sKDTFilename, std::vector& newStart, std::vector& newRoot) const; - void DivideTree(KDTNode* pTree, std::vector& indices,int first, int last, - int index, int &iTreeSize); - void ChooseDivision(KDTNode& node, const std::vector& indices, int first, int last); - int SelectDivisionDimension(const std::vector& varianceValues) const; - int Subdivide(const KDTNode& node, std::vector& indices, const int first, const int last); - - // Functions for building Graph - void BuildRNG(); - bool SaveRNG(std::string sGraphFilename) const; - void PartitionByTptree(std::vector &indices, - const int first, - const int last, - std::vector> &leaves); - void RefineRNG(); - void RefineRNGNode(const int node, COMMON::WorkSpace &space, bool updateNeighbors); - void RebuildRNGNodeNeighbors(int* nodes, const BasicResult* queryResults, int numResults); - float GraphAccuracyEstimation(int NSample, bool rng); - - // Functions for hybrid search - void KDTSearch(const int node, const bool isInit, const float distBound, - COMMON::WorkSpace& space, COMMON::QueryResultSet &query, const tbb::concurrent_unordered_set &deleted) const; + ErrorCode RefineIndex(const std::string& p_folderPath); + void SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const; + void SearchIndexWithoutDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const; }; } // namespace KDT } // namespace SPTAG diff --git a/AnnService/inc/Core/KDT/ParameterDefinitionList.h b/AnnService/inc/Core/KDT/ParameterDefinitionList.h index 932a525f..8ca2ef74 100644 --- a/AnnService/inc/Core/KDT/ParameterDefinitionList.h +++ b/AnnService/inc/Core/KDT/ParameterDefinitionList.h @@ -5,25 +5,27 @@ DefineKDTParameter(m_sKDTFilename, std::string, std::string("tree.bin"), "TreeFi DefineKDTParameter(m_sGraphFilename, std::string, std::string("graph.bin"), "GraphFilePath") DefineKDTParameter(m_sDataPointsFilename, std::string, std::string("vectors.bin"), "VectorFilePath") -DefineKDTParameter(m_iKDTNumber, int, 1L, "KDTNumber") -DefineKDTParameter(m_numTopDimensionKDTSplit, int, 5L, "NumTopDimensionKDTSplit") -DefineKDTParameter(m_numSamplesKDTSplitConsideration, int, 100L, "NumSamplesKDTSplitConsideration") -DefineKDTParameter(m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") -DefineKDTParameter(m_iTPTNumber, int, 32L, "TPTNumber") -DefineKDTParameter(m_iTPTLeafSize, int, 2000L, "TPTLeafSize") -DefineKDTParameter(m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTPTSplit") -DefineKDTParameter(m_numSamplesTPTSplitConsideration, int, 100L, "NumSamplesTPTSplitConsideration") -DefineKDTParameter(m_iCEF, int, 1000L, "CEF") -DefineKDTParameter(m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckForRefineGraph") -DefineKDTParameter(m_iMaxCheck, int, 8192L, "MaxCheck") -DefineKDTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads") +DefineKDTParameter(m_pTrees.m_iTreeNumber, int, 1L, "KDTNumber") +DefineKDTParameter(m_pTrees.m_numTopDimensionKDTSplit, int, 5L, "NumTopDimensionKDTSplit") +DefineKDTParameter(m_pTrees.m_iSamples, int, 100L, "NumSamplesKDTSplitConsideration") -DefineKDTParameter(g_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation") -DefineKDTParameter(g_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots") -DefineKDTParameter(g_iNumberOfOtherDynamicPivots, int, 4L, "NumberOfOtherDynamicPivots") +DefineKDTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber") +DefineKDTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize") +DefineKDTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTPTSplit") +DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") +DefineKDTParameter(m_pGraph.m_iNeighborhoodScale, int, 16L, "GraphNeighborhoodScale") +DefineKDTParameter(m_pGraph.m_iCEFScale, int, 4L, "GraphCEFScale") +DefineKDTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations") +DefineKDTParameter(m_pGraph.m_iCEF, int, 1000L, "CEF") +DefineKDTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckForRefineGraph") + +DefineKDTParameter(m_iNumberOfThreads, int, 1L, "NumberOfThreads") DefineKDTParameter(m_iDistCalcMethod, SPTAG::DistCalcMethod, SPTAG::DistCalcMethod::Cosine, "DistCalcMethod") -DefineKDTParameter(m_iRefineIter, int, 0L, "RefineIterations") -DefineKDTParameter(m_iDebugLoad, int, -1, "NumTrains") -DefineKDTParameter(m_iCacheSize, int, -1, "CacheSize") + +DefineKDTParameter(m_iMaxCheck, int, 8192L, "MaxCheck") +DefineKDTParameter(m_iThresholdOfNumberOfContinuousNoBetterPropagation, int, 3L, "ThresholdOfNumberOfContinuousNoBetterPropagation") +DefineKDTParameter(m_iNumberOfInitialDynamicPivots, int, 50L, "NumberOfInitialDynamicPivots") +DefineKDTParameter(m_iNumberOfOtherDynamicPivots, int, 4L, "NumberOfOtherDynamicPivots") + #endif diff --git a/AnnService/inc/Core/MetadataSet.h b/AnnService/inc/Core/MetadataSet.h index e9794893..f476531e 100644 --- a/AnnService/inc/Core/MetadataSet.h +++ b/AnnService/inc/Core/MetadataSet.h @@ -26,6 +26,8 @@ class MetadataSet virtual ErrorCode SaveMetadata(const std::string& p_metaFile, const std::string& p_metaindexFile) = 0; + virtual ErrorCode RefineMetadata(std::vector& indices, const std::string& p_folderPath); + static ErrorCode MetaCopy(const std::string& p_src, const std::string& p_dst); }; @@ -52,7 +54,7 @@ class FileMetadataSet : public MetadataSet std::vector m_pOffsets; - int m_count; + SizeType m_count; std::string m_metaFile; diff --git a/AnnService/inc/Core/VectorIndex.h b/AnnService/inc/Core/VectorIndex.h index 6f648d36..cbe1b579 100644 --- a/AnnService/inc/Core/VectorIndex.h +++ b/AnnService/inc/Core/VectorIndex.h @@ -5,6 +5,7 @@ #include "SearchQuery.h" #include "VectorSet.h" #include "MetadataSet.h" +#include "inc/Helper/SimpleIniReader.h" namespace SPTAG { @@ -16,9 +17,9 @@ class VectorIndex virtual ~VectorIndex(); - virtual ErrorCode SaveIndex(const std::string& p_folderPath) = 0; + virtual ErrorCode SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) = 0; - virtual ErrorCode LoadIndex(const std::string& p_folderPath) = 0; + virtual ErrorCode LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) = 0; virtual ErrorCode LoadIndexFromMemory(const std::vector& p_indexBlobs) = 0; @@ -30,14 +31,12 @@ class VectorIndex virtual ErrorCode DeleteIndex(const void* p_vectors, int p_vectorNum) = 0; - virtual ErrorCode RefineIndex(const std::string& p_folderPath) = 0; - - virtual ErrorCode MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2) = 0; - //virtual ErrorCode AddIndexWithID(const void* p_vector, const int& p_id) = 0; //virtual ErrorCode DeleteIndexWithID(const void* p_vector, const int& p_id) = 0; - + + virtual float ComputeDistance(const void* pX, const void* pY) const = 0; + virtual const void* GetSample(const int idx) const = 0; virtual int GetFeatureDim() const = 0; virtual int GetNumSamples() const = 0; @@ -49,6 +48,10 @@ class VectorIndex virtual std::string GetParameter(const char* p_param) const = 0; virtual ErrorCode SetParameter(const char* p_param, const char* p_value) = 0; + virtual ErrorCode LoadIndex(const std::string& p_folderPath); + + virtual ErrorCode SaveIndex(const std::string& p_folderPath); + virtual ErrorCode BuildIndex(std::shared_ptr p_vectorSet, std::shared_ptr p_metadataSet); virtual ErrorCode SearchIndex(const void* p_vector, int p_neighborCount, std::vector& p_results) const; @@ -61,16 +64,22 @@ class VectorIndex virtual ByteArray GetMetadata(IndexType p_vectorID) const; virtual void SetMetadata(const std::string& p_metadataFilePath, const std::string& p_metadataIndexPath); - void SetIndexName(const std::string& p_indexName); - - const std::string& GetIndexName() const; + virtual std::string GetIndexName() const + { + if (m_sIndexName == "") + return Helper::Convert::ConvertToString(GetIndexAlgoType()); + return m_sIndexName; + } + virtual void SetIndexName(std::string p_name) { m_sIndexName = p_name; } static std::shared_ptr CreateInstance(IndexAlgoType p_algo, VectorValueType p_valuetype); + static ErrorCode MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2); + static ErrorCode LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr& p_vectorIndex); protected: - std::string m_indexName; + std::string m_sIndexName; std::shared_ptr m_pMetadata; }; diff --git a/AnnService/src/Core/BKT/BKTIndex.cpp b/AnnService/src/Core/BKT/BKTIndex.cpp index 8f9a1862..c6f3d466 100644 --- a/AnnService/src/Core/BKT/BKTIndex.cpp +++ b/AnnService/src/Core/BKT/BKTIndex.cpp @@ -1,9 +1,4 @@ #include "inc/Core/BKT/Index.h" -#include "inc/Core/Common/WorkSpacePool.h" -#include "inc/Core/MetadataSet.h" -#include "inc/Helper/StringConvert.h" -#include "inc/Helper/CommonHelper.h" -#include "inc/Helper/SimpleIniReader.h" #pragma warning(disable:4996) // 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. #pragma warning(disable:4242) // '=' : conversion from 'int' to 'short', possible loss of data @@ -14,307 +9,93 @@ namespace SPTAG { namespace BKT { -#pragma region Load data points, kd-tree, neighborhood graph template ErrorCode Index::LoadIndexFromMemory(const std::vector& p_indexBlobs) { - if (!LoadDataPoints((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue; - if (!LoadBKT((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue; - if (!LoadGraph((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue; + if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue; + if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue; + if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue; return ErrorCode::Success; } template - ErrorCode Index::LoadIndex(const std::string& p_folderPath) + ErrorCode Index::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) { - std::string folderPath(p_folderPath); - if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) - { - folderPath += FolderSep; - } - - Helper::IniReader p_configReader; - if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini")) - { - return ErrorCode::FailedOpenFile; - } - - std::string metadataSection("MetaData"); - if (p_configReader.DoesSectionExist(metadataSection)) - { - std::string metadataFilePath = p_configReader.GetParameter(metadataSection, - "MetaDataFilePath", - std::string()); - std::string metadataIndexFilePath = p_configReader.GetParameter(metadataSection, - "MetaDataIndexPath", - std::string()); - - m_pMetadata.reset(new FileMetadataSet(folderPath + metadataFilePath, folderPath + metadataIndexFilePath)); - - if (!m_pMetadata->Available()) - { - std::cerr << "Error: Failed to load metadata." << std::endl; - return ErrorCode::Fail; - } - } - #define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ SetParameter(RepresentStr, \ - p_configReader.GetParameter("Index", \ - RepresentStr, \ - std::string(#DefaultValue)).c_str()); \ + p_reader.GetParameter("Index", \ + RepresentStr, \ + std::string(#DefaultValue)).c_str()); \ #include "inc/Core/BKT/ParameterDefinitionList.h" #undef DefineBKTParameter - if (DistCalcMethod::Undefined == m_iDistCalcMethod) - { - return ErrorCode::Fail; - } - - if (!LoadDataPoints(folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; - if (!LoadBKT(folderPath + m_sBKTFilename)) return ErrorCode::Fail; - if (!LoadGraph(folderPath + m_sGraphFilename)) return ErrorCode::Fail; - - m_iDataSize = m_pSamples.R(); - m_iDataDimension = m_pSamples.C(); - m_dataUpdateLock.resize(m_iDataSize); + if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; + if (!m_pTrees.LoadTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail; + if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); m_workSpacePool->Init(m_iNumberOfThreads); return ErrorCode::Success; } - template - bool Index::LoadDataPoints(std::string sDataPointsFileName) - { - std::cout << "Load Data Points From " << sDataPointsFileName << std::endl; - FILE * fp = fopen(sDataPointsFileName.c_str(), "rb"); - if (fp == NULL) return false; - - int R, C; - fread(&R, sizeof(int), 1, fp); - fread(&C, sizeof(int), 1, fp); - - if (m_iDebugLoad > 0 && R > m_iDebugLoad) R = m_iDebugLoad; - - m_pSamples.Initialize(R, C); - int i = 0, batch = 10000; - while (i + batch < R) { - fread((m_pSamples)[i], sizeof(T), C * batch, fp); - i += batch; - } - fread((m_pSamples)[i], sizeof(T), C * (R - i), fp); - fclose(fp); - std::cout << "Load Data Points (" << m_pSamples.R() << ", " << m_pSamples.C() << ") Finish!" << std::endl; - return true; - } - - // Functions for loading models from memory mapped files - template - bool Index::LoadDataPoints(char* pDataPointsMemFile) - { - int R, C; - R = *((int*)pDataPointsMemFile); - pDataPointsMemFile += sizeof(int); - - C = *((int*)pDataPointsMemFile); - pDataPointsMemFile += sizeof(int); - - m_pSamples.Initialize(R, C, (T*)pDataPointsMemFile); - - return true; - } - - template - bool Index::LoadBKT(std::string sBKTFilename) - { - std::cout << "Load BKT From " << sBKTFilename << std::endl; - FILE *fp = fopen(sBKTFilename.c_str(), "rb"); - if (fp == NULL) return false; - int realBKTNumber; - fread(&realBKTNumber, sizeof(int), 1, fp); - m_pBKTStart.resize(realBKTNumber); - fread(m_pBKTStart.data(), sizeof(int), realBKTNumber, fp); - if (realBKTNumber < m_iBKTNumber) m_iBKTNumber = realBKTNumber; - int treeNodeSize; - fread(&treeNodeSize, sizeof(int), 1, fp); - m_pBKTRoots.resize(treeNodeSize); - for (int i = 0; i < treeNodeSize; i++) { - fread(&(m_pBKTRoots[i].centerid), sizeof(int), 1, fp); - fread(&(m_pBKTRoots[i].childStart), sizeof(int), 1, fp); - fread(&(m_pBKTRoots[i].childEnd), sizeof(int), 1, fp); - } - fclose(fp); - std::cout << "Load BKT (" << m_iBKTNumber << ", " << treeNodeSize << ") Finish!" << std::endl; - return true; - } - - template - bool Index::LoadBKT(char* pBKTMemFile) - { - int realBKTNumber = *((int*)pBKTMemFile); - pBKTMemFile += sizeof(int); - m_pBKTStart.resize(realBKTNumber); - memcpy(m_pBKTStart.data(), pBKTMemFile, sizeof(int)*realBKTNumber); - pBKTMemFile += sizeof(int)*realBKTNumber; - if (realBKTNumber < m_iBKTNumber) m_iBKTNumber = realBKTNumber; +#pragma region K-NN search - int treeNodeSize = *((int*)pBKTMemFile); - pBKTMemFile += sizeof(int); - m_pBKTRoots.resize(treeNodeSize); - for (int i = 0; i < treeNodeSize; i++) { - m_pBKTRoots[i].centerid = *((int*)pBKTMemFile); - pBKTMemFile += sizeof(int); - m_pBKTRoots[i].childStart = *((int*)pBKTMemFile); - pBKTMemFile += sizeof(int); - m_pBKTRoots[i].childEnd = *((int*)pBKTMemFile); - pBKTMemFile += sizeof(int); - } - return true; - } +#define Search(CheckDeleted1) \ + m_pTrees.InitSearchTrees(this, p_query, p_space); \ + const int checkPos = m_pGraph.m_iNeighborhoodSize - 1; \ + while (!p_space.m_SPTQueue.empty()) { \ + m_pTrees.SearchTrees(this, p_query, p_space, m_iNumberOfOtherDynamicPivots + p_space.m_iNumberOfCheckedLeaves); \ + while (!p_space.m_NGQueue.empty()) { \ + COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \ + const int *node = m_pGraph[gnode.node]; \ + _mm_prefetch((const char *)node, _MM_HINT_T0); \ + CheckDeleted1 { \ + if (p_query.AddPoint(gnode.node, gnode.distance)) { \ + p_space.m_iNumOfContinuousNoBetterPropagation = 0; \ + int checkNode = node[checkPos]; \ + if (checkNode < -1) { \ + const COMMON::BKTNode& tnode = m_pTrees[-2 - checkNode]; \ + for (int i = -tnode.childStart; i < tnode.childEnd; i++) { \ + if (!p_query.AddPoint(m_pTrees[i].centerid, gnode.distance)) break; \ + } \ + } \ + } \ + else { \ + p_space.m_iNumOfContinuousNoBetterPropagation++; \ + if (p_space.m_iNumOfContinuousNoBetterPropagation > p_space.m_iContinuousLimit || p_space.m_iNumberOfCheckedLeaves > p_space.m_iMaxCheck) { \ + p_query.SortResult(); return; \ + } \ + } \ + } \ + for (int i = 0; i <= checkPos; i++) { \ + _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \ + } \ + for (int i = 0; i <= checkPos; i++) { \ + int nn_index = node[i]; \ + if (nn_index < 0) break; \ + if (p_space.CheckAndSet(nn_index)) continue; \ + float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \ + p_space.m_iNumberOfCheckedLeaves++; \ + p_space.m_NGQueue.insert(COMMON::HeapCell(nn_index, distance2leaf)); \ + } \ + if (p_space.m_NGQueue.Top().distance > p_space.m_SPTQueue.Top().distance) { \ + break; \ + } \ + } \ + } \ + p_query.SortResult(); \ template - bool Index::LoadGraph(std::string sGraphFilename) + void Index::SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const { - std::cout << "Load Graph From " << sGraphFilename << std::endl; - FILE * fp = fopen(sGraphFilename.c_str(), "rb"); - if (fp == NULL) return false; - fread(&m_iGraphSize, sizeof(int), 1, fp); - int KNNinGraph; - fread(&KNNinGraph, sizeof(int), 1, fp); - if (KNNinGraph < m_iNeighborhoodSize) m_iNeighborhoodSize = KNNinGraph; - - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize); - - std::vector unusedData(KNNinGraph); - for (int i = 0; i < m_iGraphSize; i++) - { - fread((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); - if (m_iNeighborhoodSize < KNNinGraph) - { - fread(&unusedData[0], sizeof(int), KNNinGraph - m_iNeighborhoodSize, fp); - } - } - fclose(fp); - std::cout << "Load Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl; - return true; + Search(if (p_deleted.find(gnode.node) == p_deleted.end())) } template - bool Index::LoadGraph(char* pGraphMemFile) { - m_iGraphSize = *((int*)pGraphMemFile); - pGraphMemFile += sizeof(int); - - int KNNinGraph = *((int*)pGraphMemFile); - pGraphMemFile += sizeof(int); - - // In the memory mapped file mode, we'll not accept NeighborhoodSize in graph file that's larger than expected size (m_iNeighborhoodSize) - // as we don't want to make another copy to fit. - if (KNNinGraph > m_iNeighborhoodSize) return false; - - if (KNNinGraph < m_iNeighborhoodSize) m_iNeighborhoodSize = KNNinGraph; - - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize, (int*)pGraphMemFile); - - return true; - } -#pragma endregion - -#pragma region K-NN search - - template - void Index::SearchIndex(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const + void Index::SearchIndexWithoutDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const { - for (char i = 0; i < m_iBKTNumber; i++) { - const BKTNode& node = m_pBKTRoots[m_pBKTStart[i]]; - if (node.childStart < 0) { - p_space.m_SPTQueue.insert(COMMON::HeapCell(m_pBKTStart[i], m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[node.centerid], m_iDataDimension))); - } - else { - for (int begin = node.childStart; begin < node.childEnd; begin++) { - int index = m_pBKTRoots[begin].centerid; - p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[index], m_iDataDimension))); - } - } - } - int checkLimit = g_iNumberOfInitialDynamicPivots; - const int checkPos = m_iNeighborhoodSize - 1; - while (!p_space.m_SPTQueue.empty()) { - do - { - COMMON::HeapCell bcell = p_space.m_SPTQueue.pop(); - const BKTNode& tnode = m_pBKTRoots[bcell.node]; - - if (tnode.childStart < 0) { - if (!p_space.CheckAndSet(tnode.centerid)) { - p_space.m_iNumberOfCheckedLeaves++; - p_space.m_NGQueue.insert(COMMON::HeapCell(tnode.centerid, bcell.distance)); - } - if (p_space.m_iNumberOfCheckedLeaves >= checkLimit) break; - } - else { - if (!p_space.CheckAndSet(tnode.centerid)) { - p_space.m_NGQueue.insert(COMMON::HeapCell(tnode.centerid, bcell.distance)); - } - for (int begin = tnode.childStart; begin < tnode.childEnd; begin++) { - int index = m_pBKTRoots[begin].centerid; - p_space.m_SPTQueue.insert(COMMON::HeapCell(begin, m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[index], m_iDataDimension))); - } - } - } while (!p_space.m_SPTQueue.empty()); - while (!p_space.m_NGQueue.empty()) { - COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); - const int *node = (m_pNeighborhoodGraph)[gnode.node]; - _mm_prefetch((const char *)node, _MM_HINT_T0); - if (p_deleted.find(gnode.node) == p_deleted.end()) { - if (p_query.AddPoint(gnode.node, gnode.distance)) { - p_space.m_iNumOfContinuousNoBetterPropagation = 0; - - int checkNode = node[checkPos]; - if (checkNode < -1) { - const BKTNode& tnode = m_pBKTRoots[-2 - checkNode]; - for (int i = -tnode.childStart; i < tnode.childEnd; i++) { - if (p_deleted.find(m_pBKTRoots[i].centerid) == p_deleted.end()) { - if (!p_query.AddPoint(m_pBKTRoots[i].centerid, gnode.distance)) break; - } - } - } - } - else { - p_space.m_iNumOfContinuousNoBetterPropagation++; - if (p_space.m_iNumOfContinuousNoBetterPropagation > p_space.m_iContinuousLimit || p_space.m_iNumberOfCheckedLeaves > p_space.m_iMaxCheck) { - p_query.SortResult(); return; - } - } - } - -#ifdef PREFETCH - for (int i = 0; i <= checkPos; i++) { - _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); - } -#endif - - for (int i = 0; i <= checkPos; i++) - { - int nn_index = node[i]; - - // do not check it if it has been checked - if (nn_index < 0) break; - if (p_space.CheckAndSet(nn_index)) continue; - - // count the number of the computed nodes - float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], m_iDataDimension); - p_space.m_iNumberOfCheckedLeaves++; - p_space.m_NGQueue.insert(COMMON::HeapCell(nn_index, distance2leaf)); - } - if (p_space.m_NGQueue.Top().distance > p_space.m_SPTQueue.Top().distance) { - checkLimit = g_iNumberOfOtherDynamicPivots + p_space.m_iNumberOfCheckedLeaves; - break; - } - } - } - p_query.SortResult(); + Search(;) } template @@ -324,633 +105,48 @@ namespace SPTAG auto workSpace = m_workSpacePool->Rent(); workSpace->Reset(m_iMaxCheck); - SearchIndex(*((COMMON::QueryResultSet*)&p_query), *workSpace, m_deletedID); + if (m_deletedID.size() > 0) + SearchIndexWithDeleted(*((COMMON::QueryResultSet*)&p_query), *workSpace, m_deletedID); + else + SearchIndexWithoutDeleted(*((COMMON::QueryResultSet*)&p_query), *workSpace); + m_workSpacePool->Return(workSpace); if (p_query.WithMeta() && nullptr != m_pMetadata) { for (int i = 0; i < p_query.GetResultNum(); ++i) { - for (int i = 0; i < p_query.GetResultNum(); ++i) - { - int result = p_query.GetResult(i)->VID; - p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result)); - } + int result = p_query.GetResult(i)->VID; + p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result)); } } - return ErrorCode::Success; } #pragma endregion -#pragma region Build/Save kd-tree & neighborhood graphs template ErrorCode Index::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension) { - m_pSamples.Initialize(p_vectorNum, p_dimension); - std::memcpy(m_pSamples.GetData(), p_data, p_vectorNum * p_dimension * sizeof(T)); - m_iDataSize = m_pSamples.R(); - m_iDataDimension = m_pSamples.C(); - m_dataUpdateLock.resize(m_iDataSize); + omp_set_num_threads(m_iNumberOfThreads); + + m_pSamples.Initialize(p_vectorNum, p_dimension, (T*)p_data, false); if (DistCalcMethod::Cosine == m_iDistCalcMethod) { int base = COMMON::Utils::GetBase(); - for (int i = 0; i < m_iDataSize; i++) { - COMMON::Utils::Normalize(m_pSamples[i], m_iDataDimension, base); +#pragma omp parallel for + for (int i = 0; i < GetNumSamples(); i++) { + COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base); } } - std::vector indices(m_iDataSize); - for (int i = 0; i < m_iDataSize; i++) indices[i] = i; - BuildBKT(indices, m_pBKTStart, m_pBKTRoots); - BuildRNG(); m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); m_workSpacePool->Init(m_iNumberOfThreads); - return ErrorCode::Success; - } - -#pragma region Build/Save kd-tree - template - bool Index::SaveBKT(std::string sBKTFilename, std::vector& newStart, std::vector& newRoot) const - { - std::cout << "Save BKT to " << sBKTFilename << std::endl; - FILE *fp = fopen(sBKTFilename.c_str(), "wb"); - if(fp == NULL) return false; - fwrite(&m_iBKTNumber, sizeof(int), 1, fp); - fwrite(newStart.data(), sizeof(int), m_iBKTNumber, fp); - int treeNodeSize = (int)newRoot.size(); - fwrite(&treeNodeSize, sizeof(int), 1, fp); - for (int i = 0; i < treeNodeSize; i++) { - fwrite(&(newRoot[i].centerid), sizeof(int), 1, fp); - fwrite(&(newRoot[i].childStart), sizeof(int), 1, fp); - fwrite(&(newRoot[i].childEnd), sizeof(int), 1, fp); - } - fclose(fp); - std::cout << "Save BKT Finish!" << std::endl; - return true; - } - - template - void Index::BuildBKT(std::vector& indices, std::vector& newStart, std::vector& newRoot) - { - omp_set_num_threads(m_iNumberOfThreads); - struct BKTStackItem { - int index, first, last; - BKTStackItem(int index_, int first_, int last_) : index(index_), first(first_), last(last_) {} - }; - std::stack ss; - - KmeansArgs args(m_iBKTKmeansK, m_iDataDimension, (int)indices.size(), m_iNumberOfThreads); - m_pSampleToCenter.clear(); - - for (char i = 0; i < m_iBKTNumber; i++) - { - std::random_shuffle(indices.begin(), indices.end()); - - newStart.push_back((int)newRoot.size()); - newRoot.push_back(BKTNode((int)indices.size())); - std::cout << "Start to build tree " << i + 1 << std::endl; - - ss.push(BKTStackItem(newStart[i], 0, (int)indices.size())); - while (!ss.empty()) { - BKTStackItem item = ss.top(); ss.pop(); - int newBKTid = (int)newRoot.size(); - newRoot[item.index].childStart = newBKTid; - if (item.last - item.first <= m_iBKTLeafSize) { - for (int j = item.first; j < item.last; j++) { - newRoot.push_back(BKTNode(indices[j])); - } - } - else { // clustering the data into BKTKmeansK clusters - int numClusters = KmeansClustering(indices, item.first, item.last, args); - if (numClusters <= 1) { - int end = min(item.last + 1, (int)indices.size()); - std::sort(indices.begin() + item.first, indices.begin() + end); - newRoot[item.index].centerid = indices[item.first]; - newRoot[item.index].childStart = -newRoot[item.index].childStart; - for (int j = item.first + 1; j < end; j++) { - newRoot.push_back(BKTNode(indices[j])); - m_pSampleToCenter[indices[j]] = newRoot[item.index].centerid; - } - m_pSampleToCenter[-1 - newRoot[item.index].centerid] = item.index; - } - else { - for (int k = 0; k < m_iBKTKmeansK; k++) { - if (args.counts[k] == 0) continue; - newRoot.push_back(BKTNode(indices[item.first + args.counts[k] - 1])); - if (args.counts[k] > 1) ss.push(BKTStackItem(newBKTid++, item.first, item.first + args.counts[k] - 1)); - item.first += args.counts[k]; - } - } - } - newRoot[item.index].childEnd = (int)newRoot.size(); - } - std::cout << i + 1 << " trees built, " << newRoot.size() - newStart[i] << " " << indices.size() << std::endl; - } - } - - template - float Index::KmeansAssign(std::vector& indices, const int first, const int last, KmeansArgs& args, bool updateCenters) { - float currDist = 0; - float lambda = (updateCenters) ? COMMON::Utils::GetBase() * COMMON::Utils::GetBase() / (100.0 * (last - first)) : 0; - int subsize = (last - first - 1) / m_iNumberOfThreads + 1; - -#pragma omp parallel for - for (int tid = 0; tid < m_iNumberOfThreads; tid++) - { - int istart = first + tid * subsize; - int iend = min(first + (tid + 1) * subsize, last); - int *inewCounts = args.newCounts + tid * m_iBKTKmeansK; - float *inewCenters = args.newCenters + tid * m_iBKTKmeansK * m_iDataDimension; - int * iclusterIdx = args.clusterIdx + tid * m_iBKTKmeansK; - float * iclusterDist = args.clusterDist + tid * m_iBKTKmeansK; - float idist = 0; - for (int i = istart; i < iend; i++) { - int clusterid = 0; - float smallestDist = MaxDist; - for (int k = 0; k < m_iBKTKmeansK; k++) { - float dist = m_fComputeDistance(m_pSamples[indices[i]], args.centers + k*m_iDataDimension, m_iDataDimension) + lambda*args.counts[k]; - if (dist > -MaxDist && dist < smallestDist) { - clusterid = k; smallestDist = dist; - } - } - args.label[i] = clusterid; - inewCounts[clusterid]++; - idist += smallestDist; - if (updateCenters) { - for (int j = 0; j < m_iDataDimension; j++) inewCenters[clusterid*m_iDataDimension + j] += m_pSamples[indices[i]][j]; - if (smallestDist > iclusterDist[clusterid]) { - iclusterDist[clusterid] = smallestDist; - iclusterIdx[clusterid] = indices[i]; - } - } - else { - if (smallestDist <= iclusterDist[clusterid]) { - iclusterDist[clusterid] = smallestDist; - iclusterIdx[clusterid] = indices[i]; - } - } - } - COMMON::Utils::atomic_float_add(&currDist, idist); - } - - for (int i = 1; i < m_iNumberOfThreads; i++) { - for (int k = 0; k < m_iBKTKmeansK; k++) - args.newCounts[k] += args.newCounts[i*m_iBKTKmeansK + k]; - } - - if (updateCenters) { - for (int i = 1; i < m_iNumberOfThreads; i++) { - float* currCenter = args.newCenters + i*m_iBKTKmeansK*m_iDataDimension; - for (int j = 0; j < m_iBKTKmeansK * m_iDataDimension; j++) args.newCenters[j] += currCenter[j]; - } - - int maxcluster = 0; - for (int k = 1; k < m_iBKTKmeansK; k++) if (args.newCounts[maxcluster] < args.newCounts[k]) maxcluster = k; - - int maxid = maxcluster; - for (int tid = 1; tid < m_iNumberOfThreads; tid++) { - if (args.clusterDist[maxid] < args.clusterDist[tid * m_iBKTKmeansK + maxcluster]) maxid = tid * m_iBKTKmeansK + maxcluster; - } - if (args.clusterIdx[maxid] < 0 || args.clusterIdx[maxid] >= m_iDataSize) - std::cout << "first:" << first << " last:" << last << " maxcluster:" << maxcluster << "(" << args.newCounts[maxcluster] << ") Error maxid:" << maxid << " dist:" << args.clusterDist[maxid] << std::endl; - maxid = args.clusterIdx[maxid]; - - for (int k = 0; k < m_iBKTKmeansK; k++) { - T* TCenter = args.newTCenters + k * m_iDataDimension; - if (args.newCounts[k] == 0) { - //int nextid = Utils::rand_int(last, first); - //while (args.label[nextid] != maxcluster) nextid = Utils::rand_int(last, first); - int nextid = maxid; - std::memcpy(TCenter, m_pSamples[nextid], sizeof(T)*m_iDataDimension); - } - else { - float* currCenters = args.newCenters + k * m_iDataDimension; - for (int j = 0; j < m_iDataDimension; j++) currCenters[j] /= args.newCounts[k]; - - if (m_iDistCalcMethod == DistCalcMethod::Cosine) { - COMMON::Utils::Normalize(currCenters, m_iDataDimension, COMMON::Utils::GetBase()); - } - for (int j = 0; j < m_iDataDimension; j++) TCenter[j] = (T)(currCenters[j]); - } - } - } - else { - for (int i = 1; i < m_iNumberOfThreads; i++) { - for (int k = 0; k < m_iBKTKmeansK; k++) { - if (args.clusterIdx[i*m_iBKTKmeansK + k] != -1 && args.clusterDist[i*m_iBKTKmeansK + k] <= args.clusterDist[k]) { - args.clusterDist[k] = args.clusterDist[i*m_iBKTKmeansK + k]; - args.clusterIdx[k] = args.clusterIdx[i*m_iBKTKmeansK + k]; - } - } - } - } - return currDist; - } - - template - int Index::KmeansClustering(std::vector& indices, const int first, const int last, KmeansArgs& args) { - int iterLimit = 100; - - int batchEnd = min(first + m_iSamples, last); - float currDiff, currDist, minClusterDist = MaxDist; - for (int numKmeans = 0; numKmeans < 3; numKmeans++) { - for (int k = 0; k < m_iBKTKmeansK; k++) { - int randid = COMMON::Utils::rand_int(last, first); - memcpy(args.centers + k*m_iDataDimension, m_pSamples[indices[randid]], sizeof(T)*m_iDataDimension); - } - args.ClearCounts(); - currDist = KmeansAssign(indices, first, batchEnd, args, false); - if (currDist < minClusterDist) { - minClusterDist = currDist; - memcpy(args.newTCenters, args.centers, sizeof(T)*m_iBKTKmeansK*m_iDataDimension); - memcpy(args.counts, args.newCounts, sizeof(int) * m_iBKTKmeansK); - } - } - - minClusterDist = MaxDist; - int noImprovement = 0; - for (int iter = 0; iter < iterLimit; iter++) { - std::memcpy(args.centers, args.newTCenters, sizeof(T)*m_iBKTKmeansK*m_iDataDimension); - std::random_shuffle(indices.begin() + first, indices.begin() + last); - - args.ClearCenters(); - args.ClearCounts(); - args.ClearDists(-MaxDist); - currDist = KmeansAssign(indices, first, batchEnd, args, true); - memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK); + + m_pTrees.BuildTrees(this); + m_pGraph.BuildGraph(this, &(m_pTrees.GetSampleMap())); - currDiff = 0; - for (int k = 0; k < m_iBKTKmeansK; k++) { - currDiff += m_fComputeDistance(args.centers + k*m_iDataDimension, args.newTCenters + k*m_iDataDimension, m_iDataDimension); - } - - if (currDist < minClusterDist) { - noImprovement = 0; - minClusterDist = currDist; - } - else { - noImprovement++; - } - if (currDiff < 1e-3 || noImprovement >= 5) break; - } - - args.ClearCounts(); - args.ClearDists(MaxDist); - currDist = KmeansAssign(indices, first, last, args, false); - memcpy(args.counts, args.newCounts, sizeof(int)*m_iBKTKmeansK); - - int numClusters = 0; - for (int i = 0; i < m_iBKTKmeansK; i++) if (args.counts[i] > 0) numClusters++; - - if (numClusters <= 1) { - //if (last - first > 1) std::cout << "large cluster:" << last - first << " dist:" << currDist << std::endl; - return numClusters; - } - args.Shuffle(indices, first, last); - return numClusters; - } -#pragma endregion - -#pragma region Build/Save neighborhood graph - template - bool Index::SaveRNG(std::string sGraphFilename) const - { - std::cout << "Save Graph To " << sGraphFilename << std::endl; - FILE *fp = fopen(sGraphFilename.c_str(), "wb"); - if (fp == NULL) return false; - fwrite(&m_iGraphSize, sizeof(int), 1, fp); - fwrite(&m_iNeighborhoodSize, sizeof(int), 1, fp); - - for (int i = 0; i < m_iGraphSize; i++) - { - fwrite((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); - } - fclose(fp); - std::cout << "Save Graph Finish!" << std::endl; - return true; - } - - template - void Index::PartitionByTptree(std::vector& indices, - const int first, - const int last, - std::vector> & leaves) - { - if (last - first <= m_iTPTLeafSize) - { - leaves.push_back(std::make_pair(first, last)); - } - else - { - std::vector Mean(m_iDataDimension, 0); - - int iIteration = 100; - int end = min(first + m_iSamples, last); - int count = end - first + 1; - // calculate the mean of each dimension - for (int j = first; j <= end; j++) - { - T* v = (m_pSamples)[indices[j]]; - for (int k = 0; k < m_iDataDimension; k++) - { - Mean[k] += v[k]; - } - } - for (int k = 0; k < m_iDataDimension; k++) - { - Mean[k] /= count; - } - std::vector Variance; - Variance.reserve(m_iDataDimension); - for (int j = 0; j < m_iDataDimension; j++) - { - Variance.push_back(BasicResult(j, 0)); - } - // calculate the variance of each dimension - for (int j = first; j <= end; j++) - { - T* v = (m_pSamples)[indices[j]]; - for (int k = 0; k < m_iDataDimension; k++) - { - float dist = v[k] - Mean[k]; - Variance[k].Dist += dist*dist; - } - } - std::sort(Variance.begin(), Variance.end(), COMMON::Compare); - std::vector index(m_numTopDimensionTpTreeSplit); - std::vector weight(m_numTopDimensionTpTreeSplit), bestweight(m_numTopDimensionTpTreeSplit); - float bestvariance = Variance[m_iDataDimension - 1].Dist; - for (int i = 0; i < m_numTopDimensionTpTreeSplit; i++) - { - index[i] = Variance[m_iDataDimension - 1 - i].VID; - bestweight[i] = 0; - } - bestweight[0] = 1; - float bestmean = Mean[index[0]]; - - std::vector Val(count); - for (int i = 0; i < iIteration; i++) - { - float sumweight = 0; - for (int j = 0; j < m_numTopDimensionTpTreeSplit; j++) - { - weight[j] = float(rand() % 10000) / 5000.0f - 1.0f; - sumweight += weight[j] * weight[j]; - } - sumweight = sqrt(sumweight); - for (int j = 0; j < m_numTopDimensionTpTreeSplit; j++) - { - weight[j] /= sumweight; - } - float mean = 0; - for (int j = 0; j < count; j++) - { - Val[j] = 0; - for (int k = 0; k < m_numTopDimensionTpTreeSplit; k++) - { - Val[j] += weight[k] * (m_pSamples)[indices[first + j]][index[k]]; - } - mean += Val[j]; - } - mean /= count; - float var = 0; - for (int j = 0; j < count; j++) - { - float dist = Val[j] - mean; - var += dist * dist; - } - if (var > bestvariance) - { - bestvariance = var; - bestmean = mean; - for (int j = 0; j < m_numTopDimensionTpTreeSplit; j++) - { - bestweight[j] = weight[j]; - } - } - } - int i = first; - int j = last; - // decide which child one point belongs - while (i <= j) - { - float val = 0; - for (int k = 0; k < m_numTopDimensionTpTreeSplit; k++) - { - val += bestweight[k] * (m_pSamples)[indices[i]][index[k]]; - } - if (val < bestmean) - { - i++; - } - else - { - std::swap(indices[i], indices[j]); - j--; - } - } - // if all the points in the node are equal,equally split the node into 2 - if ((i == first) || (i == last + 1)) - { - i = (first + last + 1) / 2; - } - - Mean.clear(); - Variance.clear(); - Val.clear(); - index.clear(); - weight.clear(); - bestweight.clear(); - - PartitionByTptree(indices, first, i - 1, leaves); - PartitionByTptree(indices, i, last, leaves); - } - } - - template - void Index::RefineRNG() { - std::vector spaces(m_iNumberOfThreads); - for (int i = 0; i < m_iNumberOfThreads; i++) spaces[i].Initialize(m_iMaxCheckForRefineGraph, m_iGraphSize); - -#pragma omp parallel for schedule(dynamic) - for (int i = 0; i < m_iGraphSize; i++) - { - RefineRNGNode(i, spaces[omp_get_thread_num()], false); - } - } - - template - void Index::BuildRNG() - { - std::cout << "build RNG graph!" << std::endl; - - omp_set_num_threads(m_iNumberOfThreads); - - int graphScale = 16; - int cefScale = 4; - m_iNeighborhoodSize *= graphScale; - m_iGraphSize = m_iDataSize; - - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize); - if (m_iGraphSize < 1000) { - std::memset(m_pNeighborhoodGraph.GetData(), -1, m_iGraphSize * m_iNeighborhoodSize * sizeof(int)); - m_iNeighborhoodSize /= graphScale; - RefineRNG(); - for (int i = 0; i < m_iGraphSize; i++) { - if (m_pSampleToCenter.find(-1 - i) != m_pSampleToCenter.end()) - m_pNeighborhoodGraph[i][m_iNeighborhoodSize - 1] = -2 - m_pSampleToCenter[-1 - i]; - } - std::cout << "Build RNG Graph end!" << std::endl; - return; - } - - { - COMMON::Dataset NeighborhoodDists(m_iGraphSize, m_iNeighborhoodSize); - std::vector> TptreeDataIndices(m_iTptreeNumber, std::vector(m_iGraphSize)); - std::vector>> TptreeLeafNodes(m_iTptreeNumber, std::vector>()); - for (int i = 0; i < m_iGraphSize; i++) - { - for (int j = 0; j < m_iNeighborhoodSize; j++) - { - (m_pNeighborhoodGraph)[i][j] = -1; - (NeighborhoodDists)[i][j] = MaxDist; - } - TptreeDataIndices[0][i] = i; - } - for (int i = 1; i < m_iTptreeNumber; i++) { - std::memcpy(TptreeDataIndices[i].data(), TptreeDataIndices[0].data(), sizeof(int) * m_iGraphSize); - } - - std::cout << "Parallel TpTree Partition begin " << std::endl; -#pragma omp parallel for schedule(dynamic) - for (int i = 0; i < m_iTptreeNumber; i++) - { - Sleep(i * 100); std::srand(clock()); - std::random_shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end()); - PartitionByTptree(TptreeDataIndices[i], 0, m_iGraphSize - 1, TptreeLeafNodes[i]); - std::cout << "Finish Getting Leaves for Tree " << i << std::endl; - } - std::cout << "Parallel TpTree Partition done" << std::endl; - - for (int i = 0; i < m_iTptreeNumber; i++) - { -#pragma omp parallel for schedule(dynamic) - for (int j = 0; j < TptreeLeafNodes[i].size(); j++) - { - int start_index = TptreeLeafNodes[i][j].first; - int end_index = TptreeLeafNodes[i][j].second; - if (omp_get_thread_num() == 0) std::cout << "\rProcessing Tree " << i << ' ' << j * 100 / TptreeLeafNodes[i].size() << '%'; - for (int x = start_index; x < end_index; x++) - { - for (int y = x + 1; y <= end_index; y++) - { - int p1 = TptreeDataIndices[i][x]; - int p2 = TptreeDataIndices[i][y]; - float dist = m_fComputeDistance((m_pSamples)[p1], (m_pSamples)[p2], m_iDataDimension); - if (m_pSampleToCenter.find(p2) == m_pSampleToCenter.end()) - COMMON::Utils::AddNeighbor(p2, dist, (m_pNeighborhoodGraph)[p1], (NeighborhoodDists)[p1], m_iNeighborhoodSize); - else - COMMON::Utils::AddNeighbor(m_pSampleToCenter[p2], dist, (m_pNeighborhoodGraph)[p1], (NeighborhoodDists)[p1], m_iNeighborhoodSize); - if (m_pSampleToCenter.find(p1) == m_pSampleToCenter.end()) - COMMON::Utils::AddNeighbor(p1, dist, (m_pNeighborhoodGraph)[p2], (NeighborhoodDists)[p2], m_iNeighborhoodSize); - else - COMMON::Utils::AddNeighbor(m_pSampleToCenter[p1], dist, (m_pNeighborhoodGraph)[p2], (NeighborhoodDists)[p2], m_iNeighborhoodSize); - } - } - } - TptreeDataIndices[i].clear(); - TptreeLeafNodes[i].clear(); - std::cout << std::endl; - } - TptreeDataIndices.clear(); - TptreeLeafNodes.clear(); - - for (int i = 0; i < m_iDataSize; i++) { - if (m_pSampleToCenter.find(-1 - i) != m_pSampleToCenter.end()) { - BKTNode& tnode = m_pBKTRoots[m_pSampleToCenter[-1 - i]]; - for (int iter = -tnode.childStart; iter != tnode.childEnd; iter++) { - int node = m_pBKTRoots[iter].centerid; - for (int j = 0; j < m_iNeighborhoodSize; j++) { - int index = m_pNeighborhoodGraph[node][j]; - if (index == i) continue; - COMMON::Utils::AddNeighbor(index, NeighborhoodDists[node][j], (m_pNeighborhoodGraph)[i], (NeighborhoodDists)[i], m_iNeighborhoodSize); - } - } - } - } - } - std::cout << "NNG acc:" << GraphAccuracyEstimation(100, false) << std::endl; - if (m_iMaxCheckForRefineGraph > 0) { - m_iCEF *= cefScale; - m_iMaxCheckForRefineGraph *= cefScale; - RefineRNG(); - std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(100, true) << std::endl; - - m_iCEF /= cefScale; - m_iMaxCheckForRefineGraph /= cefScale; - m_iNeighborhoodSize /= graphScale; - - //RefineRNG(); - //std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(100, true) << std::endl; - RefineRNG(); - std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(100, true) << std::endl; - - for (int i = 0; i < m_iGraphSize; i++) { - if (m_pSampleToCenter.find(-1 - i) != m_pSampleToCenter.end()) - m_pNeighborhoodGraph[i][m_iNeighborhoodSize - 1] = -2 - m_pSampleToCenter[-1 - i]; - } - } - std::cout << "Build RNG Graph end!" << std::endl; - } - - template - float Index::GraphAccuracyEstimation(int NSample, bool rng) { - int* correct = new int[NSample]; - -#pragma omp parallel for schedule(dynamic) - for (int i = 0; i < NSample; i++) - { - int x = COMMON::Utils::rand_int(m_iGraphSize); - //int x = i; - COMMON::QueryResultSet query((m_pSamples)[x], m_iCEF); - for (int y = 0; y < m_iGraphSize; y++) - { - if (m_pSampleToCenter.find(y) != m_pSampleToCenter.end() || y == x) continue; - float dist = m_fComputeDistance(query.GetTarget(), (m_pSamples)[y], m_iDataDimension); - query.AddPoint(y, dist); - } - query.SortResult(); - int * exact_rng = new int[m_iNeighborhoodSize]; - if (rng) { - RebuildRNGNodeNeighbors(exact_rng, query.GetResults(), m_iCEF); - } - else { - for (int j = 0; j < m_iNeighborhoodSize && j < m_iCEF; j++) { - exact_rng[j] = query.GetResult(j)->VID; - } - for (int j = m_iCEF; j < m_iNeighborhoodSize; j++) exact_rng[j] = -1; - } - correct[i] = 0; - for (int j = 0; j < m_iNeighborhoodSize; j++) { - if (exact_rng[j] == -1) { - correct[i] += m_iNeighborhoodSize - j; - break; - } - for (int k = 0; k < m_iNeighborhoodSize; k++) - if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j]) { - correct[i]++; - break; - } - } - delete[] exact_rng; - } - float acc = 0; - for (int i = 0; i < NSample; i++) acc += float(correct[i]); - acc = acc / NSample / m_iNeighborhoodSize; - delete[] correct; - return acc; + return ErrorCode::Success; } template @@ -966,136 +162,41 @@ namespace SPTAG { mkdir(folderPath.c_str()); } - tbb::concurrent_unordered_set deleted(m_deletedID.begin(), m_deletedID.end()); + + std::lock_guard lock(m_dataLock); + int newR = GetNumSamples(); + std::vector indices; - std::unordered_map old2new; - int newR = m_iDataSize; + std::vector reverseIndices(newR); for (int i = 0; i < newR; i++) { - if (deleted.find(i) == deleted.end()) { + if (m_deletedID.find(i) == m_deletedID.end()) { indices.push_back(i); - old2new[i] = i; + reverseIndices[i] = i; } else { - while (deleted.find(newR - 1) != deleted.end() && newR > i) newR--; + while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--; if (newR == i) break; indices.push_back(newR - 1); - old2new[newR - 1] = i; + reverseIndices[newR - 1] = i; newR--; } } - old2new[-1] = -1; - std::cout << "Refine... from " << m_iDataSize << "->" << newR << std::endl; - std::ofstream vecOut(folderPath + m_sDataPointsFilename, std::ios::binary); - if (!vecOut.is_open()) return ErrorCode::FailedCreateFile; - vecOut.write((char*)&newR, sizeof(int)); - vecOut.write((char*)&m_iDataDimension, sizeof(int)); - for (int i = 0; i < newR; i++) { - vecOut.write((char*)m_pSamples[indices[i]], sizeof(T)*m_iDataDimension); - } - vecOut.close(); - - if (nullptr != m_pMetadata) - { - std::ofstream metaOut(folderPath + "metadata.bin_tmp", std::ios::binary); - std::ofstream metaIndexOut(folderPath + "metadataIndex.bin", std::ios::binary); - if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile; - metaIndexOut.write((char*)&newR, sizeof(int)); - std::uint64_t offset = 0; - for (int i = 0; i < newR; i++) { - metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); - ByteArray meta = m_pMetadata->GetMetadata(indices[i]); - metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length()); - offset += meta.Length(); - } - metaOut.close(); - metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); - metaIndexOut.close(); - - SPTAG::MetadataSet::MetaCopy(folderPath + "metadata.bin_tmp", folderPath + "metadata.bin"); - } + std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl; - std::vector newRoot; - std::vector newStart; - std::vector tmpindices(indices.begin(), indices.end()); - BuildBKT(tmpindices, newStart, newRoot); + if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile; + if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile; + + COMMON::BKTree newTrees(m_pTrees); + newTrees.BuildTrees(this, &indices); #pragma omp parallel for - for (int i = 0; i < newRoot.size(); i++) { - newRoot[i].centerid = old2new[newRoot[i].centerid]; + for (int i = 0; i < newTrees.size(); i++) { + newTrees[i].centerid = reverseIndices[newTrees[i].centerid]; } - SaveBKT(folderPath + m_sBKTFilename, newStart, newRoot); - - std::ofstream graphOut(folderPath + m_sGraphFilename, std::ios::binary); - if (!graphOut.is_open()) return ErrorCode::FailedCreateFile; - graphOut.write((char*)&newR, sizeof(int)); - graphOut.write((char*)&m_iNeighborhoodSize, sizeof(int)); - - int *neighbors = new int[m_iNeighborhoodSize]; - COMMON::WorkSpace space; - space.Initialize(m_iMaxCheckForRefineGraph, m_iDataSize); - for (int i = 0; i < newR; i++) { - space.Reset(m_iMaxCheckForRefineGraph); - COMMON::QueryResultSet query((m_pSamples)[indices[i]], m_iCEF); - space.CheckAndSet(indices[i]); - for (int j = 0; j < m_iNeighborhoodSize; j++) { - int index = m_pNeighborhoodGraph[indices[i]][j]; - if (index < 0 || space.CheckAndSet(index)) continue; - space.m_NGQueue.insert(COMMON::HeapCell(index, m_fComputeDistance(query.GetTarget(), m_pSamples[index], m_iDataDimension))); - } - SearchIndex(query, space, deleted); - RebuildRNGNodeNeighbors(neighbors, query.GetResults(), m_iCEF); - for (int j = 0; j < m_iNeighborhoodSize; j++) - neighbors[j] = old2new[neighbors[j]]; - if (m_pSampleToCenter.find(-1 - indices[i]) != m_pSampleToCenter.end()) { - neighbors[m_iNeighborhoodSize - 1] = -2 - m_pSampleToCenter[-1 - indices[i]]; - } - graphOut.write((char*)neighbors, sizeof(int) * m_iNeighborhoodSize); - } - delete[]neighbors; - graphOut.close(); + newTrees.SaveTrees(folderPath + m_sBKTFilename); - return ErrorCode::Success; - } - - template - ErrorCode Index::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2) { - std::string folderPath1(p_indexFilePath1), folderPath2(p_indexFilePath2); - if (!folderPath1.empty() && *(folderPath1.rbegin()) != FolderSep) folderPath1 += FolderSep; - if (!folderPath2.empty() && *(folderPath2.rbegin()) != FolderSep) folderPath2 += FolderSep; - - Helper::IniReader p_configReader1, p_configReader2; - if (ErrorCode::Success != p_configReader1.LoadIniFile(folderPath1 + "/indexloader.ini")) - return ErrorCode::FailedOpenFile; - - if (ErrorCode::Success != p_configReader2.LoadIniFile(folderPath2 + "/indexloader.ini")) - return ErrorCode::FailedOpenFile; - - std::string empty(""); - if (!COMMON::DataUtils::MergeIndex(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), - folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), - folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty), - folderPath2 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), - folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), - folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty))) - return ErrorCode::Fail; - -#define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ - SetParameter(RepresentStr, \ - p_configReader1.GetParameter("Index", \ - RepresentStr, \ - std::string(#DefaultValue)).c_str()); \ - -#include "inc/Core/BKT/ParameterDefinitionList.h" -#undef DefineBKTParameter - - if (!LoadDataPoints(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty))) return ErrorCode::FailedOpenFile; - std::vector indices(m_iDataSize); - for (int i = 0; i < m_iDataSize; i++) indices[i] = i; - BuildBKT(indices, m_pBKTStart, m_pBKTRoots); - BuildRNG(); - - SaveBKT(folderPath1 + p_configReader1.GetParameter("Index", "TreeFilePath", empty), m_pBKTStart, m_pBKTRoots); - SaveRNG(folderPath1 + p_configReader1.GetParameter("Index", "GraphFilePath", empty)); + m_pGraph.RefineGraph(this, indices, reverseIndices, folderPath + m_sGraphFilename, + &(newTrees.GetSampleMap())); return ErrorCode::Success; } @@ -1104,10 +205,12 @@ namespace SPTAG const T* ptr_v = (const T*)p_vectors; #pragma omp parallel for schedule(dynamic) for (int i = 0; i < p_vectorNum; i++) { - COMMON::QueryResultSet query(ptr_v + i * m_iDataDimension, m_iCEF); + COMMON::QueryResultSet query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF); SearchIndex(query); - for (int i = 0; i < m_iCEF; i++) { + + for (int i = 0; i < m_pGraph.m_iCEF; i++) { if (query.GetResult(i)->Dist < 1e-6) { + std::lock_guard lock(m_dataLock); m_deletedID.insert(query.GetResult(i)->VID); } } @@ -1116,232 +219,79 @@ namespace SPTAG } template - ErrorCode Index::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) { - if (m_pBKTRoots.size() == 0) { - return BuildIndex(p_vectors, p_vectorNum, p_dimension); - } - if (p_dimension != m_iDataDimension) return ErrorCode::FailedParseValue; - + ErrorCode Index::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) + { int begin, end; { - std::lock_guard lock(m_dataAllocLock); - + std::lock_guard lock(m_dataLock); + + if (GetNumSamples() == 0) + return BuildIndex(p_vectors, p_vectorNum, p_dimension); + + if (p_dimension != GetFeatureDim()) + return ErrorCode::FailedParseValue; + + begin = GetNumSamples(); + end = GetNumSamples() + p_vectorNum; + m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum); - m_pNeighborhoodGraph.AddBatch(p_vectorNum); + m_pGraph.AddBatch(p_vectorNum); - end = m_iDataSize + p_vectorNum; - if (m_pSamples.R() != end || m_pNeighborhoodGraph.R() != end) { + if (m_pSamples.R() != end || m_pGraph.R() != end) { std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl; - m_pSamples.SetR(m_iDataSize); - m_pNeighborhoodGraph.SetR(m_iDataSize); + m_pSamples.SetR(begin); + m_pGraph.SetR(begin); return ErrorCode::Fail; } - begin = m_iDataSize; - m_iDataSize = end; - m_iGraphSize = end; - m_dataUpdateLock.resize(m_iDataSize); - } - if (DistCalcMethod::Cosine == m_iDistCalcMethod) - { - int base = COMMON::Utils::GetBase(); - for (int i = begin; i < end; i++) { - COMMON::Utils::Normalize((T*)m_pSamples[i], m_iDataDimension, base); + if (DistCalcMethod::Cosine == m_iDistCalcMethod) + { + int base = COMMON::Utils::GetBase(); + for (int i = begin; i < end; i++) { + COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base); + } } } - auto space = m_workSpacePool->Rent(); for (int node = begin; node < end; node++) { - RefineRNGNode(node, *(space.get()), true); + m_pGraph.RefineNode(this, node, true); } - m_workSpacePool->Return(space); std::cout << "Add " << p_vectorNum << " vectors" << std::endl; return ErrorCode::Success; } - template - void Index::RefineRNGNode(const int node, COMMON::WorkSpace &space, bool updateNeighbors) { - space.Reset(m_iMaxCheckForRefineGraph); - COMMON::QueryResultSet query((m_pSamples)[node], m_iCEF); - space.CheckAndSet(node); - for (int i = 0; i < m_iNeighborhoodSize; i++) { - int index = m_pNeighborhoodGraph[node][i]; - if (index < 0 || space.CheckAndSet(index)) continue; - space.m_NGQueue.insert(COMMON::HeapCell(index, m_fComputeDistance(query.GetTarget(), m_pSamples[index], m_iDataDimension))); - } - SearchIndex(query, space, m_deletedID); - RebuildRNGNodeNeighbors(m_pNeighborhoodGraph[node], query.GetResults(), m_iCEF); - - if (updateNeighbors) { - // update neighbors - for (int j = 0; j < m_iCEF; j++) - { - BasicResult* item = query.GetResult(j); - if (item->VID < 0) break; - - int insertID = node; - int* nodes = m_pNeighborhoodGraph[item->VID]; - std::lock_guard lock(m_dataUpdateLock[item->VID]); - for (int k = 0; k < m_iNeighborhoodSize; k++) - { - int tmpNode = nodes[k]; - if (tmpNode < -1) continue; - - if (tmpNode < 0) - { - bool good = true; - for (int t = 0; t < k; t++) { - if (m_fComputeDistance((m_pSamples)[insertID], (m_pSamples)[nodes[t]], m_iDataDimension) < item->Dist) { - good = false; - break; - } - } - if (good) { - nodes[k] = insertID; - } - break; - } - float tmpDist = m_fComputeDistance(m_pSamples[item->VID], m_pSamples[tmpNode], m_iDataDimension); - if (item->Dist < tmpDist || (item->Dist == tmpDist && insertID < tmpNode)) - { - bool good = true; - for (int t = 0; t < k; t++) { - if (m_fComputeDistance((m_pSamples)[insertID], (m_pSamples)[nodes[t]], m_iDataDimension) < item->Dist) { - good = false; - break; - } - } - if (good) { - nodes[k] = insertID; - insertID = tmpNode; - item->Dist = tmpDist; - } - else { - break; - } - } - } - } - } - } - - template - void Index::RebuildRNGNodeNeighbors(int* nodes, const BasicResult* queryResults, int numResults) { - int count = 0; - for (int j = 0; j < numResults && count < m_iNeighborhoodSize; j++) { - const BasicResult& item = queryResults[j]; - if (item.VID < 0) continue; - - bool good = true; - for (int k = 0; k < count; k++) { - if (m_fComputeDistance((m_pSamples)[nodes[k]], (m_pSamples)[item.VID], m_iDataDimension) <= item.Dist) { - good = false; - break; - } - } - if (good) nodes[count++] = item.VID; - } - for (int j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1; - } - - template - bool Index::SaveDataPoints(std::string sDataPointsFileName) - { - std::cout << "Save Data Points To " << sDataPointsFileName << std::endl; - - FILE * fp = fopen(sDataPointsFileName.c_str(), "wb"); - if (fp == NULL) return false; - - int R = m_pSamples.R(), C = m_pSamples.C(); - fwrite(&R, sizeof(int), 1, fp); - fwrite(&C, sizeof(int), 1, fp); - - // write point one by one in case for cache miss - for (int i = 0; i < R; i++) { - fwrite((m_pSamples)[i], sizeof(T), C, fp); - } - fclose(fp); - - std::cout << "Save Data Points (" << m_pSamples.R() << ", " << m_pSamples.C() << ") Finish!" << std::endl; - return true; - } - template ErrorCode - Index::SaveIndex(const std::string& p_folderPath) + Index::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) { - std::string folderPath(p_folderPath); - if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) - { - folderPath += FolderSep; - } - - if (!direxists(folderPath.c_str())) - { - mkdir(folderPath.c_str()); - } - - std::string loaderFilePath = folderPath + "indexloader.ini"; - - std::ofstream loaderFile(loaderFilePath); - if (!loaderFile.is_open()) - { - return ErrorCode::FailedCreateFile; - } - m_sDataPointsFilename = "vectors.bin"; m_sBKTFilename = "tree.bin"; m_sGraphFilename = "graph.bin"; - std::string metadataFile = "metadata.bin"; - std::string metadataIndexFile = "metadataIndex.bin"; - - loaderFile << "[Index]" << std::endl; - loaderFile << "IndexAlgoType=" << Helper::Convert::ConvertToString(IndexAlgoType::BKT) << std::endl; - loaderFile << "ValueType=" << Helper::Convert::ConvertToString(GetEnumValueType()) << std::endl; - loaderFile << std::endl; #define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ - loaderFile << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; + p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; #include "inc/Core/BKT/ParameterDefinitionList.h" #undef DefineBKTParameter - loaderFile << std::endl; - - if (nullptr != m_pMetadata) - { - loaderFile << "[MetaData]" << std::endl; - loaderFile << "MetaDataFilePath=" << metadataFile << std::endl; - loaderFile << "MetaDataIndexPath=" << metadataIndexFile << std::endl; - loaderFile << std::endl; - } - loaderFile.close(); + p_configout << std::endl; if (m_deletedID.size() > 0) { - RefineIndex(folderPath); + RefineIndex(p_folderPath); } - else { - if (!SaveDataPoints(folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; - if (!SaveBKT(folderPath + m_sBKTFilename, m_pBKTStart, m_pBKTRoots)) return ErrorCode::Fail; - if (!SaveRNG(folderPath + m_sGraphFilename)) return ErrorCode::Fail; - if (nullptr != m_pMetadata) - { - m_pMetadata->SaveMetadata(folderPath + metadataFile, folderPath + metadataIndexFile); - } + else { + if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; + if (!m_pTrees.SaveTrees(p_folderPath + m_sBKTFilename)) return ErrorCode::Fail; + if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; } return ErrorCode::Success; } -#pragma endregion -#pragma endregion template ErrorCode Index::SetParameter(const char* p_param, const char* p_value) { - if (nullptr == p_param || nullptr == p_value) - { - return ErrorCode::Fail; - } + if (nullptr == p_param || nullptr == p_value) return ErrorCode::Fail; #define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ else if (SPTAG::Helper::StrUtils::StrEqualIgnoreCase(p_param, RepresentStr)) \ @@ -1366,10 +316,7 @@ namespace SPTAG std::string Index::GetParameter(const char* p_param) const { - if (nullptr == p_param) - { - return std::string(); - } + if (nullptr == p_param) return std::string(); #define DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) \ else if (SPTAG::Helper::StrUtils::StrEqualIgnoreCase(p_param, RepresentStr)) \ diff --git a/AnnService/src/Core/Common/NeighborhoodGraph.cpp b/AnnService/src/Core/Common/NeighborhoodGraph.cpp new file mode 100644 index 00000000..20ed8630 --- /dev/null +++ b/AnnService/src/Core/Common/NeighborhoodGraph.cpp @@ -0,0 +1,14 @@ +#include "inc/Core/Common/NeighborhoodGraph.h" +#include "inc/Core/Common/RelativeNeighborhoodGraph.h" + +using namespace SPTAG::COMMON; + +std::shared_ptr NeighborhoodGraph::CreateInstance(std::string type) +{ + std::shared_ptr res; + if (type == "RNG") + { + res.reset(new RelativeNeighborhoodGraph); + } + return res; +} \ No newline at end of file diff --git a/AnnService/src/Core/KDT/KDTIndex.cpp b/AnnService/src/Core/KDT/KDTIndex.cpp index a6db67d1..00bb877f 100644 --- a/AnnService/src/Core/KDT/KDTIndex.cpp +++ b/AnnService/src/Core/KDT/KDTIndex.cpp @@ -1,9 +1,4 @@ #include "inc/Core/KDT/Index.h" -#include "inc/Core/Common/WorkSpacePool.h" -#include "inc/Core/MetadataSet.h" -#include "inc/Helper/StringConvert.h" -#include "inc/Helper/CommonHelper.h" -#include "inc/Helper/SimpleIniReader.h" #pragma warning(disable:4996) // 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. #pragma warning(disable:4242) // '=' : conversion from 'int' to 'short', possible loss of data @@ -14,322 +9,83 @@ namespace SPTAG { namespace KDT { -#pragma region Load data points, kd-tree, neighborhood graph template ErrorCode Index::LoadIndexFromMemory(const std::vector& p_indexBlobs) { - if (!LoadDataPoints((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue; - if (!LoadKDT((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue; - if (!LoadGraph((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue; + if (!m_pSamples.Load((char*)p_indexBlobs[0])) return ErrorCode::FailedParseValue; + if (!m_pTrees.LoadTrees((char*)p_indexBlobs[1])) return ErrorCode::FailedParseValue; + if (!m_pGraph.LoadGraph((char*)p_indexBlobs[2])) return ErrorCode::FailedParseValue; return ErrorCode::Success; } template - ErrorCode Index::LoadIndex(const std::string& p_folderPath) + ErrorCode Index::LoadIndex(const std::string& p_folderPath, Helper::IniReader& p_reader) { - std::string folderPath(p_folderPath); - if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) - { - folderPath += FolderSep; - } - - Helper::IniReader p_configReader; - if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini")) - { - return ErrorCode::FailedOpenFile; - } - - std::string metadataSection("MetaData"); - if (p_configReader.DoesSectionExist(metadataSection)) - { - std::string metadataFilePath = p_configReader.GetParameter(metadataSection, - "MetaDataFilePath", - std::string()); - std::string metadataIndexFilePath = p_configReader.GetParameter(metadataSection, - "MetaDataIndexPath", - std::string()); - - m_pMetadata.reset(new FileMetadataSet(folderPath + metadataFilePath, folderPath + metadataIndexFilePath)); - - if (!m_pMetadata->Available()) - { - std::cerr << "Error: Failed to load metadata." << std::endl; - return ErrorCode::Fail; - } - } - #define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ SetParameter(RepresentStr, \ - p_configReader.GetParameter("Index", \ - RepresentStr, \ - std::string(#DefaultValue)).c_str()); \ + p_reader.GetParameter("Index", \ + RepresentStr, \ + std::string(#DefaultValue)).c_str()); \ #include "inc/Core/KDT/ParameterDefinitionList.h" #undef DefineKDTParameter - if (DistCalcMethod::Undefined == m_iDistCalcMethod) - { - return ErrorCode::Fail; - } - - if (!LoadDataPoints(folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; - if (!LoadKDT(folderPath + m_sKDTFilename)) return ErrorCode::Fail; - if (!LoadGraph(folderPath + m_sGraphFilename)) return ErrorCode::Fail; - - m_iDataSize = m_pSamples.R(); - m_iDataDimension = m_pSamples.C(); - m_dataUpdateLock.resize(m_iDataSize); + if (!m_pSamples.Load(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; + if (!m_pTrees.LoadTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail; + if (!m_pGraph.LoadGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); m_workSpacePool->Init(m_iNumberOfThreads); return ErrorCode::Success; } - template - bool Index::LoadDataPoints(std::string sDataPointsFileName) - { - std::cout << "Load Data Points From " << sDataPointsFileName << std::endl; - FILE * fp = fopen(sDataPointsFileName.c_str(), "rb"); - if (fp == NULL) return false; - - int R, C; - fread(&R, sizeof(int), 1, fp); - fread(&C, sizeof(int), 1, fp); - - if (m_iDebugLoad > 0 && R > m_iDebugLoad) R = m_iDebugLoad; - - m_pSamples.Initialize(R, C); - int i = 0, batch = 10000; - while (i + batch < R) { - fread((m_pSamples)[i], sizeof(T), C * batch, fp); - i += batch; - } - fread((m_pSamples)[i], sizeof(T), C * (R - i), fp); - fclose(fp); - std::cout << "Load Data Points (" << m_pSamples.R() << ", " << m_pSamples.C() << ") Finish!" << std::endl; - return true; - } - - // Functions for loading models from memory mapped files - template - bool Index::LoadDataPoints(char* pDataPointsMemFile) - { - int R, C; - R = *((int*)pDataPointsMemFile); - pDataPointsMemFile += sizeof(int); - - C = *((int*)pDataPointsMemFile); - pDataPointsMemFile += sizeof(int); - - m_pSamples.Initialize(R, C, (T*)pDataPointsMemFile); - - return true; - } +#pragma region K-NN search - template - bool Index::LoadKDT(std::string sKDTFilename) - { - std::cout << "Load KDT From " << sKDTFilename << std::endl; - FILE *fp = fopen(sKDTFilename.c_str(), "rb"); - if (fp == NULL) return false; - int realKDTNumber; - fread(&realKDTNumber, sizeof(int), 1, fp); - if (realKDTNumber < m_iKDTNumber) m_iKDTNumber = realKDTNumber; - m_pKDTStart.resize(m_iKDTNumber + 1, -1); - for (int i = 0; i < m_iKDTNumber; i++) { - int treeNodeSize; - fread(&treeNodeSize, sizeof(int), 1, fp); - if (treeNodeSize > 0) { - m_pKDTStart[i] = (int)(m_pKDTRoots.size()); - m_pKDTRoots.resize(m_pKDTRoots.size() + treeNodeSize); - fread(&(m_pKDTRoots[m_pKDTStart[i]]), sizeof(KDTNode), treeNodeSize, fp); - } - } - if (m_pKDTRoots.size() > 0) m_pKDTStart[m_iKDTNumber] = (int)(m_pKDTRoots.size()); - fclose(fp); - std::cout << "Load KDT (" << m_iKDTNumber << ", " << m_pKDTRoots.size() << ") Finish!" << std::endl; - return true; - } +#define Search(CheckDeleted1) \ + m_pTrees.InitSearchTrees(this, p_query, p_space, m_iNumberOfInitialDynamicPivots); \ + while (!p_space.m_NGQueue.empty()) { \ + COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); \ + const int *node = m_pGraph[gnode.node]; \ + _mm_prefetch((const char *)node, _MM_HINT_T0); \ + CheckDeleted1 { \ + if (!p_query.AddPoint(gnode.node, gnode.distance) && p_space.m_iNumberOfCheckedLeaves > p_space.m_iMaxCheck) { \ + p_query.SortResult(); return; \ + } \ + } \ + for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \ + _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \ + bool bLocalOpt = true; \ + for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \ + int nn_index = node[i]; \ + if (nn_index < 0) break; \ + if (p_space.CheckAndSet(nn_index)) continue; \ + float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \ + if (distance2leaf <= p_query.worstDist() || distance2leaf < gnode.distance) bLocalOpt = false; \ + p_space.m_iNumberOfCheckedLeaves++; \ + p_space.m_NGQueue.insert(COMMON::HeapCell(nn_index, distance2leaf)); \ + } \ + if (bLocalOpt) p_space.m_iNumOfContinuousNoBetterPropagation++; \ + else p_space.m_iNumOfContinuousNoBetterPropagation = 0; \ + if (p_space.m_iNumOfContinuousNoBetterPropagation > m_iThresholdOfNumberOfContinuousNoBetterPropagation) { \ + if (p_space.m_iNumberOfTreeCheckedLeaves <= p_space.m_iNumberOfCheckedLeaves / 10) { \ + m_pTrees.SearchTrees(this, p_query, p_space, m_iNumberOfOtherDynamicPivots + p_space.m_iNumberOfCheckedLeaves); \ + } else if (gnode.distance > p_query.worstDist()) { \ + break; \ + } \ + } \ + } \ + p_query.SortResult(); \ template - bool Index::LoadKDT(char* pKDTMemFile) + void Index::SearchIndexWithDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const { - int realKDTNumber = *((int*)pKDTMemFile); - pKDTMemFile += sizeof(int); - if (realKDTNumber < m_iKDTNumber) m_iKDTNumber = realKDTNumber; - m_pKDTStart.clear(); - for (int i = 0; i < m_iKDTNumber; i++) { - m_pKDTStart.push_back((int)(m_pKDTRoots.size())); - - int treeNodeSize = *((int*)pKDTMemFile); - pKDTMemFile += sizeof(int); - m_pKDTRoots.resize(m_pKDTRoots.size() + treeNodeSize); - std::memcpy(&(m_pKDTRoots[m_pKDTStart[i]]), pKDTMemFile, sizeof(KDTNode)*treeNodeSize); - pKDTMemFile += sizeof(KDTNode)*treeNodeSize; - } - m_pKDTStart.push_back((int)(m_pKDTRoots.size())); - return true; + Search(if (p_deleted.find(gnode.node) == p_deleted.end())) } template - bool Index::LoadGraph(std::string sGraphFilename) + void Index::SearchIndexWithoutDeleted(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space) const { - std::cout << "Load Graph From " << sGraphFilename << std::endl; - FILE * fp = fopen(sGraphFilename.c_str(), "rb"); - if (fp == NULL) return false; - fread(&m_iGraphSize, sizeof(int), 1, fp); - int KNNinGraph; - fread(&KNNinGraph, sizeof(int), 1, fp); - if (KNNinGraph < m_iNeighborhoodSize) m_iNeighborhoodSize = KNNinGraph; - - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize); - - std::vector unusedData(KNNinGraph); - for (int i = 0; i < m_iGraphSize; i++) - { - fread((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); - if (m_iNeighborhoodSize < KNNinGraph) - { - fread(&unusedData[0], sizeof(int), KNNinGraph - m_iNeighborhoodSize, fp); - } - } - fclose(fp); - std::cout << "Load Graph (" << m_iGraphSize << "," << m_iNeighborhoodSize << ") Finish!" << std::endl; - return true; - } - - template - bool Index::LoadGraph(char* pGraphMemFile) { - m_iGraphSize = *((int*)pGraphMemFile); - pGraphMemFile += sizeof(int); - - int KNNinGraph = *((int*)pGraphMemFile); - pGraphMemFile += sizeof(int); - - // In the memory mapped file mode, we'll not accept NeighborhoodSize in graph file that's larger than expected size (m_iNeighborhoodSize) - // as we don't want to make another copy to fit. - if (KNNinGraph > m_iNeighborhoodSize) return false; - - if (KNNinGraph < m_iNeighborhoodSize) m_iNeighborhoodSize = KNNinGraph; - - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize, (int*)pGraphMemFile); - - return true; - } -#pragma endregion - -#pragma region K-NN search - template - void Index::KDTSearch(const int node, const bool isInit, const float distBound, - COMMON::WorkSpace& p_space, COMMON::QueryResultSet &p_query, const tbb::concurrent_unordered_set &p_deleted) const { - if (node < 0) - { - int index = -node - 1; - if (index >= m_iDataSize) return; - -#ifdef PREFETCH - const char* data = (const char *)(m_pSamples[index]); - _mm_prefetch(data, _MM_HINT_T0); - _mm_prefetch(data + 64, _MM_HINT_T0); -#endif - if (p_space.CheckAndSet(index)) return; - - float distance = m_fComputeDistance(p_query.GetTarget(), (T*)data, m_iDataDimension); - if (p_deleted.find(index) == p_deleted.end()) p_query.AddPoint(index, distance); - ++p_space.m_iNumberOfTreeCheckedLeaves; - ++p_space.m_iNumberOfCheckedLeaves; - p_space.m_NGQueue.insert(COMMON::HeapCell(index, distance)); - return; - } - - auto& tnode = m_pKDTRoots[node]; - - float diff = (p_query.GetTarget())[tnode.split_dim] - tnode.split_value; - float distanceBound = distBound + diff * diff; - int otherChild, bestChild; - if (diff < 0) - { - bestChild = tnode.left; - otherChild = tnode.right; - } - else - { - otherChild = tnode.left; - bestChild = tnode.right; - } - - if (!isInit || distanceBound < p_query.worstDist()) - { - p_space.m_SPTQueue.insert(COMMON::HeapCell(otherChild, distanceBound)); - } - KDTSearch(bestChild, isInit, distBound, p_space, p_query, p_deleted); - } - - template - void Index::SearchIndex(COMMON::QueryResultSet &p_query, COMMON::WorkSpace &p_space, const tbb::concurrent_unordered_set &p_deleted) const - { - for (char i = 0; i < m_iKDTNumber; i++) { - KDTSearch(m_pKDTStart[i], true, 0, p_space, p_query, p_deleted); - } - - while (!p_space.m_SPTQueue.empty() && p_space.m_iNumberOfCheckedLeaves < g_iNumberOfInitialDynamicPivots) - { - auto& tcell = p_space.m_SPTQueue.pop(); - if (p_query.worstDist() < tcell.distance) break; - KDTSearch(tcell.node, true, tcell.distance, p_space, p_query, p_deleted); - } - - while (!p_space.m_NGQueue.empty()) { - bool bLocalOpt = true; - COMMON::HeapCell gnode = p_space.m_NGQueue.pop(); - const int *node = (m_pNeighborhoodGraph)[gnode.node]; - -#ifdef PREFETCH - _mm_prefetch((const char *)node, _MM_HINT_T0); - for (int i = 0; i < m_iNeighborhoodSize; i++) - { - _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); - } -#endif - - for (int i = 0; i < m_iNeighborhoodSize; i++) - { - int nn_index = node[i]; - - // do not check it if it has been checked - if (nn_index < 0) break; - if (p_space.CheckAndSet(nn_index)) continue; - - // count the number of the computed nodes - float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], m_iDataDimension); - - if (p_deleted.find(nn_index) == p_deleted.end()) p_query.AddPoint(nn_index, distance2leaf); - if (distance2leaf <= p_query.worstDist()|| distance2leaf < gnode.distance) bLocalOpt = false; - p_space.m_iNumberOfCheckedLeaves++; - p_space.m_NGQueue.insert(COMMON::HeapCell(nn_index, distance2leaf)); - } - - if (bLocalOpt) p_space.m_iNumOfContinuousNoBetterPropagation++; - else p_space.m_iNumOfContinuousNoBetterPropagation = 0; - - if (p_space.m_iNumOfContinuousNoBetterPropagation > g_iThresholdOfNumberOfContinuousNoBetterPropagation) - { - if (p_space.m_iNumberOfTreeCheckedLeaves < p_space.m_iNumberOfCheckedLeaves / 10) - { - int nextNumberOfCheckedLeaves = g_iNumberOfOtherDynamicPivots + p_space.m_iNumberOfCheckedLeaves; - while (!p_space.m_SPTQueue.empty() && p_space.m_iNumberOfCheckedLeaves < nextNumberOfCheckedLeaves) - { - auto& tcell = p_space.m_SPTQueue.pop(); - KDTSearch(tcell.node, false, tcell.distance, p_space, p_query, p_deleted); - } - } - else if (gnode.distance > p_query.worstDist()) { - break; - } - } - if (p_space.m_iNumberOfCheckedLeaves >= p_space.m_iMaxCheck) break; - } - p_query.SortResult(); + Search(;) } template @@ -339,7 +95,11 @@ namespace SPTAG auto workSpace = m_workSpacePool->Rent(); workSpace->Reset(m_iMaxCheck); - SearchIndex(*((COMMON::QueryResultSet*)&p_query), *workSpace, m_deletedID); + if (m_deletedID.size() > 0) + SearchIndexWithDeleted(*((COMMON::QueryResultSet*)&p_query), *workSpace, m_deletedID); + else + SearchIndexWithoutDeleted(*((COMMON::QueryResultSet*)&p_query), *workSpace); + m_workSpacePool->Return(workSpace); if (p_query.WithMeta() && nullptr != m_pMetadata) @@ -350,523 +110,33 @@ namespace SPTAG p_query.SetMetadata(i, (result < 0) ? ByteArray::c_empty : m_pMetadata->GetMetadata(result)); } } - return ErrorCode::Success; } #pragma endregion -#pragma region Build/Save kd-tree & neighborhood graphs template ErrorCode Index::BuildIndex(const void* p_data, int p_vectorNum, int p_dimension) { - m_pSamples.Initialize(p_vectorNum, p_dimension); - std::memcpy(m_pSamples.GetData(), p_data, p_vectorNum * p_dimension * sizeof(T)); - m_iDataSize = m_pSamples.R(); - m_iDataDimension = m_pSamples.C(); - m_dataUpdateLock.resize(m_iDataSize); + omp_set_num_threads(m_iNumberOfThreads); + + m_pSamples.Initialize(p_vectorNum, p_dimension, (T*)p_data, false); if (DistCalcMethod::Cosine == m_iDistCalcMethod) { int base = COMMON::Utils::GetBase(); - for (int i = 0; i < m_iDataSize; i++) { - COMMON::Utils::Normalize(m_pSamples[i], m_iDataDimension, base); +#pragma omp parallel for + for (int i = 0; i < GetNumSamples(); i++) { + COMMON::Utils::Normalize(m_pSamples[i], GetFeatureDim(), base); } } - std::vector indices(m_iDataSize); - for (int j = 0; j < m_iDataSize; j++) indices[j] = j; - BuildKDT(indices, m_pKDTStart, m_pKDTRoots); - BuildRNG(); m_workSpacePool.reset(new COMMON::WorkSpacePool(m_iMaxCheck, GetNumSamples())); m_workSpacePool->Init(m_iNumberOfThreads); - return ErrorCode::Success; - } - -#pragma region Build/Save kd-tree - template - bool Index::SaveKDT(std::string sKDTFilename, std::vector& newStart, std::vector& newRoot) const - { - std::cout << "Save KDT to " << sKDTFilename << std::endl; - FILE *fp = fopen(sKDTFilename.c_str(), "wb"); - if (fp == NULL) return false; - fwrite(&m_iKDTNumber, sizeof(int), 1, fp); - for (int i = 0; i < m_iKDTNumber; i++) - { - int treeNodeSize = newStart[i + 1] - newStart[i]; - fwrite(&treeNodeSize, sizeof(int), 1, fp); - if (treeNodeSize > 0) fwrite(&(newRoot[newStart[i]]), sizeof(KDTNode), treeNodeSize, fp); - } - fclose(fp); - std::cout << "Save KDT Finish!" << std::endl; - return true; - } - - template - void Index::BuildKDT(std::vector& indices, std::vector& newStart, std::vector& newRoot) - { - omp_set_num_threads(m_iNumberOfThreads); - newRoot.resize(m_iKDTNumber * indices.size()); - if (indices.size() > 0) - newStart.resize(m_iKDTNumber + 1, (int)(newRoot.size())); - else - { - newStart.resize(m_iKDTNumber + 1, -1); - return; - } -#pragma omp parallel for - for (int i = 0; i < m_iKDTNumber; i++) - { - Sleep(i * 100); std::srand(clock()); - - std::vector pindices(indices.begin(), indices.end()); - std::random_shuffle(pindices.begin(), pindices.end()); - - newStart[i] = i * (int)pindices.size(); - std::cout << "Start to build tree " << i + 1 << std::endl; - int iTreeSize = newStart[i]; - DivideTree(newRoot.data(), pindices, 0, (int)pindices.size() - 1, newStart[i], iTreeSize); - std::cout << i + 1 << " trees built, " << iTreeSize - newStart[i] << " " << pindices.size() << std::endl; - } - } - - template - void Index::DivideTree(KDTNode* pTree, std::vector& indices, int first, int last, - int index, int &iTreeSize) { - ChooseDivision(pTree[index], indices, first, last); - int i = Subdivide(pTree[index], indices, first, last); - if (i - 1 <= first) - { - pTree[index].left = -indices[first] - 1; - } - else - { - iTreeSize++; - pTree[index].left = iTreeSize; - DivideTree(pTree, indices, first, i - 1, iTreeSize, iTreeSize); - } - if (last == i) - { - pTree[index].right = -indices[last] - 1; - } - else - { - iTreeSize++; - pTree[index].right = iTreeSize; - DivideTree(pTree, indices, i, last, iTreeSize, iTreeSize); - } - } - - template - void Index::ChooseDivision(KDTNode& node, const std::vector& indices, int first, int last) - { - std::vector meanValues(m_iDataDimension, 0); - std::vector varianceValues(m_iDataDimension, 0); - int end = min(first + m_numSamplesKDTSplitConsideration, last); - int count = end - first + 1; - // calculate the mean of each dimension - for (int j = first; j <= end; j++) - { - T* v = (m_pSamples)[indices[j]]; - for (int k = 0; k < m_iDataDimension; k++) - { - meanValues[k] += v[k]; - } - } - for (int k = 0; k < m_iDataDimension; k++) - { - meanValues[k] /= count; - } - // calculate the variance of each dimension - for (int j = first; j <= end; j++) - { - T* v = (m_pSamples)[indices[j]]; - for (int k = 0; k < m_iDataDimension; k++) - { - float dist = v[k] - meanValues[k]; - varianceValues[k] += dist*dist; - } - } - // choose the split dimension as one of the dimension inside TOP_DIM maximum variance - node.split_dim = SelectDivisionDimension(varianceValues); - // determine the threshold - node.split_value = meanValues[node.split_dim]; - } - - template - int Index::SelectDivisionDimension(const std::vector& varianceValues) const - { - // Record the top maximum variances - std::vector topind(m_numTopDimensionKDTSplit); - int num = 0; - // order the variances - for (int i = 0; i < m_iDataDimension; i++) - { - if (num < m_numTopDimensionKDTSplit || varianceValues[i] > varianceValues[topind[num - 1]]) - { - if (num < m_numTopDimensionKDTSplit) - { - topind[num++] = i; - } - else - { - topind[num - 1] = i; - } - int j = num - 1; - // order the TOP_DIM variances - while (j > 0 && varianceValues[topind[j]] > varianceValues[topind[j - 1]]) - { - std::swap(topind[j], topind[j - 1]); - j--; - } - } - } - // randomly choose a dimension from TOP_DIM - return topind[COMMON::Utils::rand_int(num)]; - } - - template - int Index::Subdivide(const KDTNode& node, std::vector& indices, const int first, const int last) - { - int i = first; - int j = last; - // decide which child one point belongs - while (i <= j) - { - int ind = indices[i]; - float val = (m_pSamples)[ind][node.split_dim]; - if (val < node.split_value) - { - i++; - } - else - { - std::swap(indices[i], indices[j]); - j--; - } - } - // if all the points in the node are equal,equally split the node into 2 - if ((i == first) || (i == last + 1)) - { - i = (first + last + 1) / 2; - } - return i; - } -#pragma endregion - -#pragma region Build/Save neighborhood graph - template - bool Index::SaveRNG(std::string sGraphFilename) const - { - std::cout << "Save Graph To " << sGraphFilename << std::endl; - FILE *fp = fopen(sGraphFilename.c_str(), "wb"); - if (fp == NULL) return false; - fwrite(&m_iGraphSize, sizeof(int), 1, fp); - fwrite(&m_iNeighborhoodSize, sizeof(int), 1, fp); - - for (int i = 0; i < m_iGraphSize; i++) - { - fwrite((m_pNeighborhoodGraph)[i], sizeof(int), m_iNeighborhoodSize, fp); - } - fclose(fp); - std::cout << "Save Graph Finish!" << std::endl; - return true; - } - - template - void Index::PartitionByTptree(std::vector& indices, - const int first, - const int last, - std::vector> & leaves) - { - if (last - first <= m_iTPTLeafSize) - { - leaves.push_back(std::make_pair(first, last)); - } - else - { - std::vector Mean(m_iDataDimension, 0); - - int iIteration = 100; - int end = min(first + m_numSamplesTPTSplitConsideration, last); - int count = end - first + 1; - // calculate the mean of each dimension - for (int j = first; j <= end; j++) - { - T* v = (m_pSamples)[indices[j]]; - for (int k = 0; k < m_iDataDimension; k++) - { - Mean[k] += v[k]; - } - } - for (int k = 0; k < m_iDataDimension; k++) - { - Mean[k] /= count; - } - std::vector Variance; - Variance.reserve(m_iDataDimension); - for (int j = 0; j < m_iDataDimension; j++) - { - Variance.push_back(BasicResult(j, 0)); - } - // calculate the variance of each dimension - for (int j = first; j <= end; j++) - { - T* v = (m_pSamples)[indices[j]]; - for (int k = 0; k < m_iDataDimension; k++) - { - float dist = v[k] - Mean[k]; - Variance[k].Dist += dist*dist; - } - } - std::sort(Variance.begin(), Variance.end(),COMMON::Compare); - std::vector index(m_numTopDimensionTPTSplit); - std::vector weight(m_numTopDimensionTPTSplit), bestweight(m_numTopDimensionTPTSplit); - float bestvariance = Variance[m_iDataDimension - 1].Dist; - for (int i = 0; i < m_numTopDimensionTPTSplit; i++) - { - index[i] = Variance[m_iDataDimension - 1 - i].VID; - bestweight[i] = 0; - } - bestweight[0] = 1; - float bestmean = Mean[index[0]]; - - std::vector Val(count); - for (int i = 0; i < iIteration; i++) - { - float sumweight = 0; - for (int j = 0; j < m_numTopDimensionTPTSplit; j++) - { - weight[j] = float(rand() % 10000) / 5000.0f - 1.0f; - sumweight += weight[j] * weight[j]; - } - sumweight = sqrt(sumweight); - for (int j = 0; j < m_numTopDimensionTPTSplit; j++) - { - weight[j] /= sumweight; - } - float mean = 0; - for (int j = 0; j < count; j++) - { - Val[j] = 0; - for (int k = 0; k < m_numTopDimensionTPTSplit; k++) - { - Val[j] += weight[k] * (m_pSamples)[indices[first + j]][index[k]]; - } - mean += Val[j]; - } - mean /= count; - float var = 0; - for (int j = 0; j < count; j++) - { - float dist = Val[j] - mean; - var += dist * dist; - } - if (var > bestvariance) - { - bestvariance = var; - bestmean = mean; - for (int j = 0; j < m_numTopDimensionTPTSplit; j++) - { - bestweight[j] = weight[j]; - } - } - } - int i = first; - int j = last; - // decide which child one point belongs - while (i <= j) - { - float val = 0; - for (int k = 0; k < m_numTopDimensionTPTSplit; k++) - { - val += bestweight[k] * (m_pSamples)[indices[i]][index[k]]; - } - if (val < bestmean) - { - i++; - } - else - { - std::swap(indices[i], indices[j]); - j--; - } - } - // if all the points in the node are equal,equally split the node into 2 - if ((i == first) || (i == last + 1)) - { - i = (first + last + 1) / 2; - } - - Mean.clear(); - Variance.clear(); - Val.clear(); - index.clear(); - weight.clear(); - bestweight.clear(); - - PartitionByTptree(indices, first, i - 1, leaves); - PartitionByTptree(indices, i, last, leaves); - } - } - - template - void Index::RefineRNG() { - std::vector spaces(m_iNumberOfThreads); - for (int i = 0; i < m_iNumberOfThreads; i++) spaces[i].Initialize(m_iMaxCheckForRefineGraph, m_iGraphSize); - -#pragma omp parallel for schedule(dynamic) - for (int i = 0; i < m_iGraphSize; i++) - { - RefineRNGNode(i, spaces[omp_get_thread_num()], false); - } - } - - template - void Index::BuildRNG() - { - std::cout << "build RNG graph!" << std::endl; - omp_set_num_threads(m_iNumberOfThreads); - - int graphScale = 16; - int cefScale = 4; - m_iNeighborhoodSize *= graphScale; - m_iGraphSize = m_iDataSize; - - m_pNeighborhoodGraph.Initialize(m_iGraphSize, m_iNeighborhoodSize); - if (m_iGraphSize < 1000) { - std::memset(m_pNeighborhoodGraph.GetData(), -1, m_iGraphSize * m_iNeighborhoodSize * sizeof(int)); - m_iNeighborhoodSize /= graphScale; - - COMMON::WorkSpace space; - space.Initialize(m_iMaxCheckForRefineGraph, m_iGraphSize); - for (int i = 0; i < m_iGraphSize; i++) - { - RefineRNGNode(i, space, true); - } - std::cout << "Build RNG Graph end!" << std::endl; - return; - } - - { - COMMON::Dataset NeighborhoodDists(m_iGraphSize, m_iNeighborhoodSize); - std::vector> TptreeDataIndices(m_iTPTNumber, std::vector(m_iGraphSize)); - std::vector>> TptreeLeafNodes(m_iTPTNumber, std::vector>()); - for (int i = 0; i < m_iGraphSize; i++) - { - for (int j = 0; j < m_iNeighborhoodSize; j++) - { - (m_pNeighborhoodGraph)[i][j] = -1; - (NeighborhoodDists)[i][j] = MaxDist; - } - TptreeDataIndices[0][i] = i; - } - for (int i = 1; i < m_iTPTNumber; i++) { - std::memcpy(TptreeDataIndices[i].data(), TptreeDataIndices[0].data(), sizeof(int) * m_iGraphSize); - } - - std::cout << "Parallel TpTree Partition begin " << std::endl; -#pragma omp parallel for schedule(dynamic) - for (int i = 0; i < m_iTPTNumber; i++) - { - Sleep(i * 100); std::srand(clock()); - std::random_shuffle(TptreeDataIndices[i].begin(), TptreeDataIndices[i].end()); - PartitionByTptree(TptreeDataIndices[i], 0, m_iGraphSize - 1, TptreeLeafNodes[i]); - std::cout << "Finish Getting Leaves for Tree " << i << std::endl; - } - std::cout << "Parallel TpTree Partition done" << std::endl; - - for (int i = 0; i < m_iTPTNumber; i++) - { -#pragma omp parallel for schedule(dynamic) - for (int j = 0; j < TptreeLeafNodes[i].size(); j++) - { - int start_index = TptreeLeafNodes[i][j].first; - int end_index = TptreeLeafNodes[i][j].second; - if (omp_get_thread_num() == 0) std::cout << "\rProcessing Tree " << i << ' ' << j * 100 / TptreeLeafNodes[i].size() << '%'; - for (int x = start_index; x < end_index; x++) - { - for (int y = x + 1; y <= end_index; y++) - { - int p1 = TptreeDataIndices[i][x]; - int p2 = TptreeDataIndices[i][y]; - float dist = m_fComputeDistance((m_pSamples)[p1], (m_pSamples)[p2], m_iDataDimension); - COMMON::Utils::AddNeighbor(p2, dist, (m_pNeighborhoodGraph)[p1], (NeighborhoodDists)[p1], m_iNeighborhoodSize); - COMMON::Utils::AddNeighbor(p1, dist, (m_pNeighborhoodGraph)[p2], (NeighborhoodDists)[p2], m_iNeighborhoodSize); } - } - } - TptreeDataIndices[i].clear(); - TptreeLeafNodes[i].clear(); - std::cout << std::endl; - } - TptreeDataIndices.clear(); - TptreeLeafNodes.clear(); - } - std::cout << "NNG acc:" << GraphAccuracyEstimation(100, false) << std::endl; - if (m_iMaxCheckForRefineGraph > 0) { - m_iCEF *= cefScale; - m_iMaxCheckForRefineGraph *= cefScale; - RefineRNG(); - std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(100, true) << std::endl; - - m_iCEF /= cefScale; - m_iMaxCheckForRefineGraph /= cefScale; - m_iNeighborhoodSize /= graphScale; - - //RefineRNG(); - //std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(100, true) << std::endl; - RefineRNG(); - std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(100, true) << std::endl; - } - std::cout << "Build RNG Graph end!" << std::endl; - } - - template - float Index::GraphAccuracyEstimation(int NSample, bool rng) { - int* correct = new int[NSample]; - -#pragma omp parallel for schedule(dynamic) - for (int i = 0; i < NSample; i++) - { - int x = COMMON::Utils::rand_int(m_iGraphSize); - //int x = i; - COMMON::QueryResultSet query((m_pSamples)[x], m_iCEF); - for (int y = 0; y < m_iGraphSize; y++) - { - if (y == x) continue; - float dist = m_fComputeDistance(query.GetTarget(), (m_pSamples)[y], m_iDataDimension); - query.AddPoint(y, dist); - } - query.SortResult(); - int * exact_rng = new int[m_iNeighborhoodSize]; - if (rng) { - RebuildRNGNodeNeighbors(exact_rng, query.GetResults(), m_iCEF); - } - else { - for (int j = 0; j < m_iNeighborhoodSize && j < m_iCEF; j++) { - exact_rng[j] = query.GetResult(j)->VID; - } - for (int j = m_iCEF; j < m_iNeighborhoodSize; j++) exact_rng[j] = -1; - } - correct[i] = 0; - for (int j = 0; j < m_iNeighborhoodSize; j++) { - if (exact_rng[j] == -1) { - correct[i] += m_iNeighborhoodSize - j; - break; - } - for (int k = 0; k < m_iNeighborhoodSize; k++) - if ((m_pNeighborhoodGraph)[x][k] == exact_rng[j]) { - correct[i]++; - break; - } - } - delete[] exact_rng; - } - float acc = 0; - for (int i = 0; i < NSample; i++) acc += float(correct[i]); - acc = acc / NSample / m_iNeighborhoodSize; - delete[] correct; - return acc; + m_pTrees.BuildTrees(this); + m_pGraph.BuildGraph(this); + + return ErrorCode::Success; } template @@ -882,138 +152,43 @@ namespace SPTAG { mkdir(folderPath.c_str()); } - tbb::concurrent_unordered_set deleted(m_deletedID.begin(), m_deletedID.end()); + + std::lock_guard lock(m_dataLock); + int newR = GetNumSamples(); + std::vector indices; - std::unordered_map old2new; - int newR = m_iDataSize; + std::vector reverseIndices(newR); for (int i = 0; i < newR; i++) { - if (deleted.find(i) == deleted.end()) { + if (m_deletedID.find(i) == m_deletedID.end()) { indices.push_back(i); - old2new[i] = i; + reverseIndices[i] = i; } else { - while (deleted.find(newR - 1) != deleted.end() && newR > i) newR--; + while (m_deletedID.find(newR - 1) != m_deletedID.end() && newR > i) newR--; if (newR == i) break; indices.push_back(newR - 1); - old2new[newR - 1] = i; + reverseIndices[newR - 1] = i; newR--; } } - old2new[-1] = -1; - std::cout << "Refine... from " << m_iDataSize << "->" << newR << std::endl; - std::ofstream vecOut(folderPath + m_sDataPointsFilename, std::ios::binary); - if (!vecOut.is_open()) return ErrorCode::FailedCreateFile; - vecOut.write((char*)&newR, sizeof(int)); - vecOut.write((char*)&m_iDataDimension, sizeof(int)); - for (int i = 0; i < newR; i++) { - vecOut.write((char*)(m_pSamples[indices[i]]), sizeof(T)*m_iDataDimension); - } - vecOut.close(); + std::cout << "Refine... from " << GetNumSamples() << "->" << newR << std::endl; - if (nullptr != m_pMetadata) - { - std::ofstream metaOut(folderPath + "metadata.bin_tmp", std::ios::binary); - std::ofstream metaIndexOut(folderPath + "metadataIndex.bin", std::ios::binary); - if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile; - metaIndexOut.write((char*)&newR, sizeof(int)); - std::uint64_t offset = 0; - for (int i = 0; i < newR; i++) { - metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); - ByteArray meta = m_pMetadata->GetMetadata(indices[i]); - metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length()); - offset += meta.Length(); - } - metaOut.close(); - metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); - metaIndexOut.close(); - - SPTAG::MetadataSet::MetaCopy(folderPath + "metadata.bin_tmp", folderPath + "metadata.bin"); - } + if (false == m_pSamples.Refine(indices, folderPath + m_sDataPointsFilename)) return ErrorCode::FailedCreateFile; + if (nullptr != m_pMetadata && ErrorCode::Success != m_pMetadata->RefineMetadata(indices, folderPath)) return ErrorCode::FailedCreateFile; - std::ofstream graphOut(folderPath + m_sGraphFilename, std::ios::binary); - if (!graphOut.is_open()) return ErrorCode::FailedCreateFile; - graphOut.write((char*)&newR, sizeof(int)); - graphOut.write((char*)&m_iNeighborhoodSize, sizeof(int)); - - int *neighbors = new int[m_iNeighborhoodSize]; - COMMON::WorkSpace space; - space.Initialize(m_iMaxCheckForRefineGraph, m_iDataSize); - for (int i = 0; i < newR; i++) { - space.Reset(m_iMaxCheckForRefineGraph); - COMMON::QueryResultSet query((m_pSamples)[indices[i]], m_iCEF); - space.CheckAndSet(indices[i]); - for (int j = 0; j < m_iNeighborhoodSize; j++) { - int index = m_pNeighborhoodGraph[indices[i]][j]; - if (index < 0 || space.CheckAndSet(index)) continue; - space.m_NGQueue.insert(COMMON::HeapCell(index, m_fComputeDistance(query.GetTarget(), m_pSamples[index], m_iDataDimension))); - } - SearchIndex(query, space, deleted); - RebuildRNGNodeNeighbors(neighbors, query.GetResults(), m_iCEF); - for (int j = 0; j < m_iNeighborhoodSize; j++) - neighbors[j] = old2new[neighbors[j]]; - graphOut.write((char*)neighbors, sizeof(int) * m_iNeighborhoodSize); - } - delete[]neighbors; - graphOut.close(); - - std::vector newRoot; - std::vector newStart; - BuildKDT(indices, newStart, newRoot); - + m_pGraph.RefineGraph(this, indices, reverseIndices, folderPath + m_sGraphFilename); + + COMMON::KDTree newTrees(m_pTrees); + newTrees.BuildTrees(this, &indices); #pragma omp parallel for - for (int i = 0; i < m_iKDTNumber; i++) - { - for (int j = newStart[i]; j < newStart[i+1]; j++) { - if (newRoot[j].left < 0) - newRoot[j].left = -old2new[-newRoot[j].left - 1] - 1; - if (newRoot[j].right < 0) - newRoot[j].right = -old2new[-newRoot[j].right - 1] - 1; - } + for (int i = 0; i < newTrees.size(); i++) { + if (newTrees[i].left < 0) + newTrees[i].left = -reverseIndices[-newTrees[i].left - 1] - 1; + if (newTrees[i].right < 0) + newTrees[i].right = -reverseIndices[-newTrees[i].right - 1] - 1; } - SaveKDT(folderPath + m_sKDTFilename, newStart, newRoot); - return ErrorCode::Success; - } - - template - ErrorCode Index::MergeIndex(const char* p_indexFilePath1, const char* p_indexFilePath2) { - std::string folderPath1(p_indexFilePath1), folderPath2(p_indexFilePath2); - if (!folderPath1.empty() && *(folderPath1.rbegin()) != FolderSep) folderPath1 += FolderSep; - if (!folderPath2.empty() && *(folderPath2.rbegin()) != FolderSep) folderPath2 += FolderSep; - - Helper::IniReader p_configReader1, p_configReader2; - if (ErrorCode::Success != p_configReader1.LoadIniFile(folderPath1 + "/indexloader.ini")) - return ErrorCode::FailedOpenFile; - - if (ErrorCode::Success != p_configReader2.LoadIniFile(folderPath2 + "/indexloader.ini")) - return ErrorCode::FailedOpenFile; - - std::string empty(""); - if (!COMMON::DataUtils::MergeIndex(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), - folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), - folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty), - folderPath2 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), - folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), - folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty))) - return ErrorCode::Fail; - -#define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ - SetParameter(RepresentStr, \ - p_configReader1.GetParameter("Index", \ - RepresentStr, \ - std::string(#DefaultValue)).c_str()); \ - -#include "inc/Core/KDT/ParameterDefinitionList.h" -#undef DefineKDTParameter - - if (!LoadDataPoints(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty))) return ErrorCode::FailedOpenFile; - std::vector indices(m_iDataSize); - for (int j = 0; j < m_iDataSize; j++) indices[j] = j; - BuildKDT(indices, m_pKDTStart, m_pKDTRoots); - BuildRNG(); - - SaveKDT(folderPath1 + p_configReader1.GetParameter("Index", "TreeFilePath", empty), m_pKDTStart, m_pKDTRoots); - SaveRNG(folderPath1 + p_configReader1.GetParameter("Index", "GraphFilePath", empty)); + newTrees.SaveTrees(folderPath + m_sKDTFilename); return ErrorCode::Success; } @@ -1022,10 +197,12 @@ namespace SPTAG const T* ptr_v = (const T*)p_vectors; #pragma omp parallel for schedule(dynamic) for (int i = 0; i < p_vectorNum; i++) { - COMMON::QueryResultSet query(ptr_v + i * m_iDataDimension, m_iCEF); + COMMON::QueryResultSet query(ptr_v + i * GetFeatureDim(), m_pGraph.m_iCEF); SearchIndex(query); - for (int i = 0; i < m_iCEF; i++) { + + for (int i = 0; i < m_pGraph.m_iCEF; i++) { if (query.GetResult(i)->Dist < 1e-6) { + std::lock_guard lock(m_dataLock); m_deletedID.insert(query.GetResult(i)->VID); } } @@ -1034,230 +211,79 @@ namespace SPTAG } template - ErrorCode Index::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) { - if (m_pKDTRoots.size() == 0) { - return BuildIndex(p_vectors, p_vectorNum, p_dimension); - } - if (p_dimension != m_iDataDimension) return ErrorCode::FailedParseValue; - + ErrorCode Index::AddIndex(const void* p_vectors, int p_vectorNum, int p_dimension) + { int begin, end; { - std::lock_guard lock(m_dataAllocLock); + std::lock_guard lock(m_dataLock); + + if (GetNumSamples() == 0) + return BuildIndex(p_vectors, p_vectorNum, p_dimension); + + if (p_dimension != GetFeatureDim()) + return ErrorCode::FailedParseValue; + + begin = GetNumSamples(); + end = GetNumSamples() + p_vectorNum; m_pSamples.AddBatch((const T*)p_vectors, p_vectorNum); - m_pNeighborhoodGraph.AddBatch(p_vectorNum); + m_pGraph.AddBatch(p_vectorNum); - end = m_iDataSize + p_vectorNum; - if (m_pSamples.R() != end || m_pNeighborhoodGraph.R() != end) { + if (m_pSamples.R() != end || m_pGraph.R() != end) { std::cout << "Memory Error: Cannot alloc space for vectors" << std::endl; - m_pSamples.SetR(m_iDataSize); - m_pNeighborhoodGraph.SetR(m_iDataSize); + m_pSamples.SetR(begin); + m_pGraph.SetR(begin); return ErrorCode::Fail; } - begin = m_iDataSize; - m_iDataSize = end; - m_iGraphSize = end; - m_dataUpdateLock.resize(m_iDataSize); - } - if (DistCalcMethod::Cosine == m_iDistCalcMethod) - { - int base = COMMON::Utils::GetBase(); - for (int i = begin; i < end; i++) { - COMMON::Utils::Normalize((T*)m_pSamples[i], m_iDataDimension, base); + if (DistCalcMethod::Cosine == m_iDistCalcMethod) + { + int base = COMMON::Utils::GetBase(); + for (int i = begin; i < end; i++) { + COMMON::Utils::Normalize((T*)m_pSamples[i], GetFeatureDim(), base); + } } } - auto space = m_workSpacePool->Rent(); for (int node = begin; node < end; node++) { - RefineRNGNode(node, *(space.get()), true); + m_pGraph.RefineNode(this, node, true); } - m_workSpacePool->Return(space); std::cout << "Add " << p_vectorNum << " vectors" << std::endl; return ErrorCode::Success; } - template - void Index::RefineRNGNode(const int node, COMMON::WorkSpace &space, bool updateNeighbors) { - space.Reset(m_iMaxCheckForRefineGraph); - COMMON::QueryResultSet query((m_pSamples)[node], m_iCEF); - space.CheckAndSet(node); - for (int i = 0; i < m_iNeighborhoodSize; i++) { - int index = m_pNeighborhoodGraph[node][i]; - if (index < 0 || space.CheckAndSet(index)) continue; - space.m_NGQueue.insert(COMMON::HeapCell(index, m_fComputeDistance(query.GetTarget(), m_pSamples[index], m_iDataDimension))); - } - SearchIndex(query, space, m_deletedID); - RebuildRNGNodeNeighbors(m_pNeighborhoodGraph[node], query.GetResults(), m_iCEF); - - if (updateNeighbors) { - // update neighbors - for (int j = 0; j < m_iCEF; j++) - { - BasicResult* item = query.GetResult(j); - if (item->VID < 0) break; - - int insertID = node; - int* nodes = m_pNeighborhoodGraph[item->VID]; - std::lock_guard lock(m_dataUpdateLock[item->VID]); - for (int k = 0; k < m_iNeighborhoodSize; k++) - { - int tmpNode = nodes[k]; - if (tmpNode < 0) - { - bool good = true; - for (int t = 0; t < k; t++) { - if (m_fComputeDistance((m_pSamples)[insertID], (m_pSamples)[nodes[t]], m_iDataDimension) < item->Dist) { - good = false; - break; - } - } - if (good) { - nodes[k] = insertID; - } - break; - } - float tmpDist = m_fComputeDistance(m_pSamples[item->VID], m_pSamples[tmpNode], m_iDataDimension); - if (item->Dist < tmpDist || (item->Dist == tmpDist && insertID < tmpNode)) - { - bool good = true; - for (int t = 0; t < k; t++) { - if (m_fComputeDistance((m_pSamples)[insertID], (m_pSamples)[nodes[t]], m_iDataDimension) < item->Dist) { - good = false; - break; - } - } - if (good) { - nodes[k] = insertID; - insertID = tmpNode; - item->Dist = tmpDist; - } - else { - break; - } - } - } - } - } - } - - template - void Index::RebuildRNGNodeNeighbors(int* nodes, const BasicResult* queryResults, int numResults) { - int count = 0; - for (int j = 0; j < numResults && count < m_iNeighborhoodSize; j++) { - const BasicResult& item = queryResults[j]; - if (item.VID < 0) continue; - - bool good = true; - for (int k = 0; k < count; k++) { - if (m_fComputeDistance((m_pSamples)[nodes[k]], (m_pSamples)[item.VID], m_iDataDimension) <= item.Dist) { - good = false; - break; - } - } - if (good) nodes[count++] = item.VID; - } - for (int j = count; j < m_iNeighborhoodSize; j++) nodes[j] = -1; - } - - template - bool Index::SaveDataPoints(std::string sDataPointsFileName) - { - std::cout << "Save Data Points To " << sDataPointsFileName << std::endl; - - FILE * fp = fopen(sDataPointsFileName.c_str(), "wb"); - if (fp == NULL) return false; - - int R = m_pSamples.R(), C = m_pSamples.C(); - fwrite(&R, sizeof(int), 1, fp); - fwrite(&C, sizeof(int), 1, fp); - - // write point one by one in case for cache miss - for (int i = 0; i < R; i++) { - fwrite((m_pSamples)[i], sizeof(T), C, fp); - } - fclose(fp); - - std::cout << "Save Data Points (" << m_pSamples.R() << ", " << m_pSamples.C() << ") Finish!" << std::endl; - return true; - } - template ErrorCode - Index::SaveIndex(const std::string& p_folderPath) + Index::SaveIndex(const std::string& p_folderPath, std::ofstream& p_configout) { - std::string folderPath(p_folderPath); - if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) - { - folderPath += FolderSep; - } - - if (!direxists(folderPath.c_str())) - { - mkdir(folderPath.c_str()); - } - - std::string loaderFilePath = folderPath + "indexloader.ini"; - - std::ofstream loaderFile(loaderFilePath); - if (!loaderFile.is_open()) - { - return ErrorCode::FailedCreateFile; - } - m_sDataPointsFilename = "vectors.bin"; m_sKDTFilename = "tree.bin"; m_sGraphFilename = "graph.bin"; - std::string metadataFile = "metadata.bin"; - std::string metadataIndexFile = "metadataIndex.bin"; - - loaderFile << "[Index]" << std::endl; - loaderFile << "IndexAlgoType=" << Helper::Convert::ConvertToString(IndexAlgoType::KDT) << std::endl; - loaderFile << "ValueType=" << Helper::Convert::ConvertToString(GetEnumValueType()) << std::endl; - loaderFile << std::endl; #define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ - loaderFile << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; + p_configout << RepresentStr << "=" << GetParameter(RepresentStr) << std::endl; #include "inc/Core/KDT/ParameterDefinitionList.h" #undef DefineKDTParameter - loaderFile << std::endl; - - if (nullptr != m_pMetadata) - { - loaderFile << "[MetaData]" << std::endl; - loaderFile << "MetaDataFilePath=" << metadataFile << std::endl; - loaderFile << "MetaDataIndexPath=" << metadataIndexFile << std::endl; - loaderFile << std::endl; - } - loaderFile.close(); + p_configout << std::endl; if (m_deletedID.size() > 0) { - RefineIndex(folderPath); + RefineIndex(p_folderPath); } else { - if (!SaveDataPoints(folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; - if (!SaveKDT(folderPath + m_sKDTFilename, m_pKDTStart, m_pKDTRoots)) return ErrorCode::Fail; - if (!SaveRNG(folderPath + m_sGraphFilename)) return ErrorCode::Fail; - if (nullptr != m_pMetadata) - { - m_pMetadata->SaveMetadata(folderPath + metadataFile, folderPath + metadataIndexFile); - } + if (!m_pSamples.Save(p_folderPath + m_sDataPointsFilename)) return ErrorCode::Fail; + if (!m_pTrees.SaveTrees(p_folderPath + m_sKDTFilename)) return ErrorCode::Fail; + if (!m_pGraph.SaveGraph(p_folderPath + m_sGraphFilename)) return ErrorCode::Fail; } return ErrorCode::Success; } -#pragma endregion -#pragma endregion template ErrorCode Index::SetParameter(const char* p_param, const char* p_value) { - if (nullptr == p_param || nullptr == p_value) - { - return ErrorCode::Fail; - } + if (nullptr == p_param || nullptr == p_value) return ErrorCode::Fail; #define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ else if (SPTAG::Helper::StrUtils::StrEqualIgnoreCase(p_param, RepresentStr)) \ @@ -1282,10 +308,7 @@ namespace SPTAG std::string Index::GetParameter(const char* p_param) const { - if (nullptr == p_param) - { - return std::string(); - } + if (nullptr == p_param) return std::string(); #define DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) \ else if (SPTAG::Helper::StrUtils::StrEqualIgnoreCase(p_param, RepresentStr)) \ diff --git a/AnnService/src/Core/MetadataSet.cpp b/AnnService/src/Core/MetadataSet.cpp index 405698ed..999c5b47 100644 --- a/AnnService/src/Core/MetadataSet.cpp +++ b/AnnService/src/Core/MetadataSet.cpp @@ -5,6 +5,31 @@ using namespace SPTAG; +ErrorCode +MetadataSet::RefineMetadata(std::vector& indices, const std::string& p_folderPath) +{ + std::ofstream metaOut(p_folderPath + "metadata.bin_tmp", std::ios::binary); + std::ofstream metaIndexOut(p_folderPath + "metadataIndex.bin", std::ios::binary); + if (!metaOut.is_open() || !metaIndexOut.is_open()) return ErrorCode::FailedCreateFile; + + int R = (int)indices.size(); + metaIndexOut.write((char*)&R, sizeof(int)); + std::uint64_t offset = 0; + for (int i = 0; i < R; i++) { + metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); + ByteArray meta = GetMetadata(indices[i]); + metaOut.write((char*)meta.Data(), sizeof(uint8_t)*meta.Length()); + offset += meta.Length(); + } + metaOut.close(); + metaIndexOut.write((char*)&offset, sizeof(std::uint64_t)); + metaIndexOut.close(); + + SPTAG::MetadataSet::MetaCopy(p_folderPath + "metadata.bin_tmp", p_folderPath + "metadata.bin"); + return ErrorCode::Success; +} + + ErrorCode MetadataSet::MetaCopy(const std::string& p_src, const std::string& p_dst) { @@ -60,7 +85,7 @@ FileMetadataSet::FileMetadataSet(const std::string& p_metafile, const std::strin return; } - fpidx.read((char *)&m_count, sizeof(int)); + fpidx.read((char *)&m_count, sizeof(m_count)); m_pOffsets.resize(m_count + 1); fpidx.read((char *)m_pOffsets.data(), sizeof(std::uint64_t) * (m_count + 1)); fpidx.close(); @@ -82,7 +107,7 @@ FileMetadataSet::GetMetadata(IndexType p_vectorID) const { std::uint64_t startoff = m_pOffsets[p_vectorID]; std::uint64_t bytes = m_pOffsets[p_vectorID + 1] - startoff; - if (p_vectorID < m_count) { + if (p_vectorID < (IndexType)m_count) { m_fp->seekg(startoff, std::ios_base::beg); ByteArray b = ByteArray::Alloc((SizeType)bytes); m_fp->read((char*)b.Data(), bytes); @@ -142,7 +167,7 @@ FileMetadataSet::SaveMetadata(const std::string& p_metaFile, const std::string& std::ofstream dst(p_metaindexFile, std::ios::binary); m_count = static_cast(m_pOffsets.size()) - 1; m_newdata.clear(); - dst.write((char*)&m_count, sizeof(int)); + dst.write((char*)&m_count, sizeof(m_count)); dst.write((char*)m_pOffsets.data(), sizeof(std::uint64_t) * m_pOffsets.size()); return ret; } diff --git a/AnnService/src/Core/VectorIndex.cpp b/AnnService/src/Core/VectorIndex.cpp index 341d30c7..af6cf601 100644 --- a/AnnService/src/Core/VectorIndex.cpp +++ b/AnnService/src/Core/VectorIndex.cpp @@ -1,4 +1,5 @@ #include "inc/Core/VectorIndex.h" +#include "inc/Core/Common/DataUtils.h" #include "inc/Helper/CommonHelper.h" #include "inc/Helper/StringConvert.h" #include "inc/Helper/SimpleIniReader.h" @@ -21,20 +22,6 @@ VectorIndex::~VectorIndex() } -void -VectorIndex::SetIndexName(const std::string& p_indexName) -{ - m_indexName = p_indexName; -} - - -const std::string& -VectorIndex::GetIndexName() const -{ - return m_indexName; -} - - std::string VectorIndex::GetParameter(const std::string& p_param) const { @@ -65,6 +52,92 @@ VectorIndex::GetMetadata(IndexType p_vectorID) const { } +ErrorCode +VectorIndex::LoadIndex(const std::string& p_folderPath) +{ + std::string folderPath(p_folderPath); + if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) + { + folderPath += FolderSep; + } + + Helper::IniReader p_configReader; + if (ErrorCode::Success != p_configReader.LoadIniFile(folderPath + "/indexloader.ini")) + { + return ErrorCode::FailedOpenFile; + } + + std::string metadataSection("MetaData"); + if (p_configReader.DoesSectionExist(metadataSection)) + { + std::string metadataFilePath = p_configReader.GetParameter(metadataSection, + "MetaDataFilePath", + std::string()); + std::string metadataIndexFilePath = p_configReader.GetParameter(metadataSection, + "MetaDataIndexPath", + std::string()); + + m_pMetadata.reset(new FileMetadataSet(folderPath + metadataFilePath, folderPath + metadataIndexFilePath)); + + if (!m_pMetadata->Available()) + { + std::cerr << "Error: Failed to load metadata." << std::endl; + return ErrorCode::Fail; + } + } + if (DistCalcMethod::Undefined == p_configReader.GetParameter("Index", "DistCalcMethod", DistCalcMethod::Undefined)) + { + std::cerr << "Error: Failed to load parameter DistCalcMethod." << std::endl; + return ErrorCode::Fail; + } + + return LoadIndex(folderPath, p_configReader); +} + + +ErrorCode VectorIndex::SaveIndex(const std::string& p_folderPath) +{ + std::string folderPath(p_folderPath); + if (!folderPath.empty() && *(folderPath.rbegin()) != FolderSep) + { + folderPath += FolderSep; + } + + if (!direxists(folderPath.c_str())) + { + mkdir(folderPath.c_str()); + } + + std::string loaderFilePath = folderPath + "indexloader.ini"; + + std::ofstream loaderFile(loaderFilePath); + if (!loaderFile.is_open()) + { + return ErrorCode::FailedCreateFile; + } + + if (nullptr != m_pMetadata) + { + std::string metadataFile = "metadata.bin"; + std::string metadataIndexFile = "metadataIndex.bin"; + loaderFile << "[MetaData]" << std::endl; + loaderFile << "MetaDataFilePath=" << metadataFile << std::endl; + loaderFile << "MetaDataIndexPath=" << metadataIndexFile << std::endl; + loaderFile << std::endl; + + m_pMetadata->SaveMetadata(folderPath + metadataFile, folderPath + metadataIndexFile); + } + + loaderFile << "[Index]" << std::endl; + loaderFile << "IndexAlgoType=" << Helper::Convert::ConvertToString(GetIndexAlgoType()) << std::endl; + loaderFile << "ValueType=" << Helper::Convert::ConvertToString(GetVectorValueType()) << std::endl; + loaderFile << std::endl; + + ErrorCode ret = SaveIndex(folderPath, loaderFile); + loaderFile.close(); + return ret; +} + ErrorCode VectorIndex::BuildIndex(std::shared_ptr p_vectorSet, std::shared_ptr p_metadataSet) @@ -192,3 +265,55 @@ VectorIndex::LoadIndex(const std::string& p_loaderFilePath, std::shared_ptr index = CreateInstance( + p_configReader1.GetParameter("Index", "IndexAlgoType", IndexAlgoType::Undefined), + p_configReader1.GetParameter("Index", "ValueType", VectorValueType::Undefined)); + if (index == nullptr) return ErrorCode::FailedParseValue; + + std::string empty(""); + if (!COMMON::DataUtils::MergeIndex(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), + folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), + folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty), + folderPath2 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), + folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), + folderPath2 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty))) + return ErrorCode::Fail; + + for (const auto& iter : p_configReader1.GetParameters("Index")) + index->SetParameter(iter.first.c_str(), iter.second.c_str()); + + if (p_configReader1.DoesSectionExist("MetaData")) + { + for (const auto& iter : p_configReader1.GetParameters("MetaData")) + index->SetParameter(iter.first.c_str(), iter.second.c_str()); + index->SetMetadata(folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataFilePath", empty), + folderPath1 + p_configReader1.GetParameter("MetaData", "MetaDataIndexPath", empty)); + } + + std::ifstream vecIn(folderPath1 + p_configReader1.GetParameter("Index", "VectorFilePath", empty), std::ios::binary); + int R, C; + vecIn.read((char*)&R, sizeof(int)); + vecIn.read((char*)&C, sizeof(int)); + size_t size = R * C * GetValueTypeSize(index->GetVectorValueType()); + char* data = new char[size]; + vecIn.read(data, size); + vecIn.close(); + index->BuildIndex((void*)data, R, C); + index->SaveIndex(folderPath1); + return ErrorCode::Success; +} \ No newline at end of file diff --git a/AnnService/src/IndexBuilder/main.cpp b/AnnService/src/IndexBuilder/main.cpp index d370409d..c265d758 100644 --- a/AnnService/src/IndexBuilder/main.cpp +++ b/AnnService/src/IndexBuilder/main.cpp @@ -35,7 +35,7 @@ int main(int argc, char* argv[]) std::string paramName = param.substr(0, idx); std::string paramVal = param.substr(idx + 1); std::string sectionName; - idx = paramName.find("."); + idx = (int)paramName.find("."); if (idx >= 0) { sectionName = paramName.substr(0, idx); paramName = paramName.substr(idx + 1); diff --git a/PythonWrapper/PythonCore.vcxproj b/PythonWrapper/PythonCore.vcxproj index 4e23ba73..f614d325 100644 --- a/PythonWrapper/PythonCore.vcxproj +++ b/PythonWrapper/PythonCore.vcxproj @@ -113,7 +113,8 @@ - + + @@ -128,6 +129,7 @@ - + + \ No newline at end of file diff --git a/PythonWrapper/inc/CoreInterface.h b/PythonWrapper/inc/CoreInterface.h index 2a37e3c9..903cfe05 100644 --- a/PythonWrapper/inc/CoreInterface.h +++ b/PythonWrapper/inc/CoreInterface.h @@ -59,8 +59,6 @@ class AnnIndex bool Delete(ByteArray p_data, SizeType p_num); - bool Refine(const char* p_loaderFile); - static AnnIndex Load(const char* p_loaderFile); private: diff --git a/PythonWrapper/packages.config b/PythonWrapper/packages.config index d780ec4a..667ef75b 100644 --- a/PythonWrapper/packages.config +++ b/PythonWrapper/packages.config @@ -3,4 +3,6 @@ + + \ No newline at end of file diff --git a/PythonWrapper/src/CoreInterface.cpp b/PythonWrapper/src/CoreInterface.cpp index 45dc6504..fa14e415 100644 --- a/PythonWrapper/src/CoreInterface.cpp +++ b/PythonWrapper/src/CoreInterface.cpp @@ -208,9 +208,3 @@ AnnIndex::Delete(ByteArray p_data, SizeType p_num) } return false; } - -bool -AnnIndex::Refine(const char* p_loaderFile) -{ - return (SPTAG::ErrorCode::Success == m_index->RefineIndex(std::string(p_loaderFile))); -} \ No newline at end of file From 9021bcc011eda9bbf084f1c05cbfb4befa965bee Mon Sep 17 00:00:00 2001 From: MaggieQi Date: Fri, 11 Jan 2019 14:56:56 +0800 Subject: [PATCH 02/14] Update Readme.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 331ddae8..c066dd29 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # SPTAG: A library for fast approximate nearest neighbor search -[![MIT licensed](https://img.shields.io/badge/license-MIT-yellow.svg)](https://github.com/Microsoft/nni/blob/master/LICENSE) -[![Build status](https://sysdnn.visualstudio.com/SPTAG/_apis/build/status/SPTAG-CI)](https://sysdnn.visualstudio.com/SPTAG/_build/latest?definitionId=2) +[![MIT licensed](https://img.shields.io/badge/license-MIT-yellow.svg)](https://github.com/Microsoft/SPTAG/blob/master/LICENSE) +[![Build status](https://sysdnn.visualstudio.com/SPTAG/_apis/build/status/SPTAG-GITHUB)](https://sysdnn.visualstudio.com/SPTAG/_build/latest?definitionId=2) ## **SPTAG** SPTAG (Space Partition Tree And Graph) is a library for large scale vector approximate nearest neighbor search scenerio, which is written in C++ and wrapped by Python. From b685101f00c98ce79a4f7fd9d37567cc50f04b6a Mon Sep 17 00:00:00 2001 From: MaggieQi Date: Fri, 11 Jan 2019 15:14:32 +0800 Subject: [PATCH 03/14] Fix DataSet GNU compile fail bug --- AnnService/inc/Core/Common/Dataset.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/AnnService/inc/Core/Common/Dataset.h b/AnnService/inc/Core/Common/Dataset.h index 97b399fc..a298eb0f 100644 --- a/AnnService/inc/Core/Common/Dataset.h +++ b/AnnService/inc/Core/Common/Dataset.h @@ -3,6 +3,12 @@ #include +#if defined(__INTEL_COMPILER) +#include +#else +#include +#endif // defined(__GNUC__) + #define ALIGN 32 #define aligned_malloc(a, b) _mm_malloc(a, b) From a7a61a1d47c640712b818a506b242e17c01bab21 Mon Sep 17 00:00:00 2001 From: MaggieQi Date: Fri, 11 Jan 2019 15:35:19 +0800 Subject: [PATCH 04/14] fix GNU Windows align alloc bugs --- AnnService/inc/Core/Common/Dataset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AnnService/inc/Core/Common/Dataset.h b/AnnService/inc/Core/Common/Dataset.h index a298eb0f..fd7817f8 100644 --- a/AnnService/inc/Core/Common/Dataset.h +++ b/AnnService/inc/Core/Common/Dataset.h @@ -3,7 +3,7 @@ #include -#if defined(__INTEL_COMPILER) +#if defined(_MSC_VER) || defined(__INTEL_COMPILER) #include #else #include From b821b44893015b7e08fdd59f1972f2e4e82a21b1 Mon Sep 17 00:00:00 2001 From: cheqi Date: Thu, 17 Jan 2019 10:53:55 +0800 Subject: [PATCH 05/14] add copyright in each file --- AnnService/CMakeLists.txt | 3 +++ AnnService/inc/Aggregator/AggregatorContext.h | 3 +++ AnnService/inc/Aggregator/AggregatorExecutionContext.h | 3 +++ AnnService/inc/Aggregator/AggregatorService.h | 3 +++ AnnService/inc/Aggregator/AggregatorSettings.h | 3 +++ AnnService/inc/Client/ClientWrapper.h | 3 +++ AnnService/inc/Client/Options.h | 3 +++ AnnService/inc/Core/BKT/Index.h | 3 +++ AnnService/inc/Core/BKT/ParameterDefinitionList.h | 3 +++ AnnService/inc/Core/Common.h | 3 +++ AnnService/inc/Core/Common/BKTree.h | 3 +++ AnnService/inc/Core/Common/CommonUtils.h | 3 +++ AnnService/inc/Core/Common/DataUtils.h | 3 +++ AnnService/inc/Core/Common/Dataset.h | 3 +++ AnnService/inc/Core/Common/DistanceUtils.h | 3 +++ AnnService/inc/Core/Common/FineGrainedLock.h | 3 +++ AnnService/inc/Core/Common/Heap.h | 3 +++ AnnService/inc/Core/Common/KDTree.h | 3 +++ AnnService/inc/Core/Common/NeighborhoodGraph.h | 3 +++ AnnService/inc/Core/Common/QueryResultSet.h | 3 +++ AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h | 3 +++ AnnService/inc/Core/Common/WorkSpace.h | 3 +++ AnnService/inc/Core/Common/WorkSpacePool.h | 3 +++ AnnService/inc/Core/CommonDataStructure.h | 3 +++ AnnService/inc/Core/DefinitionList.h | 3 +++ AnnService/inc/Core/KDT/Index.h | 3 +++ AnnService/inc/Core/KDT/ParameterDefinitionList.h | 3 +++ AnnService/inc/Core/MetadataSet.h | 3 +++ AnnService/inc/Core/SearchQuery.h | 3 +++ AnnService/inc/Core/VectorIndex.h | 3 +++ AnnService/inc/Core/VectorSet.h | 3 +++ AnnService/inc/Helper/ArgumentsParser.h | 3 +++ AnnService/inc/Helper/Base64Encode.h | 3 +++ AnnService/inc/Helper/CommonHelper.h | 3 +++ AnnService/inc/Helper/Concurrent.h | 3 +++ AnnService/inc/Helper/SimpleIniReader.h | 3 +++ AnnService/inc/Helper/StringConvert.h | 3 +++ AnnService/inc/IndexBuilder/Options.h | 3 +++ AnnService/inc/IndexBuilder/ThreadPool.h | 3 +++ AnnService/inc/IndexBuilder/VectorSetReader.h | 3 +++ AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h | 3 +++ AnnService/inc/Server/QueryParser.h | 3 +++ AnnService/inc/Server/SearchExecutionContext.h | 3 +++ AnnService/inc/Server/SearchExecutor.h | 3 +++ AnnService/inc/Server/SearchService.h | 3 +++ AnnService/inc/Server/ServiceContext.h | 3 +++ AnnService/inc/Server/ServiceSettings.h | 3 +++ AnnService/inc/Socket/Client.h | 3 +++ AnnService/inc/Socket/Common.h | 3 +++ AnnService/inc/Socket/Connection.h | 3 +++ AnnService/inc/Socket/ConnectionManager.h | 3 +++ AnnService/inc/Socket/Packet.h | 3 +++ AnnService/inc/Socket/RemoteSearchQuery.h | 3 +++ AnnService/inc/Socket/ResourceManager.h | 3 +++ AnnService/inc/Socket/Server.h | 3 +++ AnnService/inc/Socket/SimpleSerialization.h | 3 +++ AnnService/src/Aggregator/AggregatorContext.cpp | 3 +++ AnnService/src/Aggregator/AggregatorExecutionContext.cpp | 3 +++ AnnService/src/Aggregator/AggregatorService.cpp | 4 +++- AnnService/src/Aggregator/AggregatorSettings.cpp | 3 +++ AnnService/src/Aggregator/main.cpp | 3 +++ AnnService/src/Client/ClientWrapper.cpp | 3 +++ AnnService/src/Client/Options.cpp | 3 +++ AnnService/src/Client/main.cpp | 3 +++ AnnService/src/Core/BKT/BKTIndex.cpp | 3 +++ AnnService/src/Core/Common/NeighborhoodGraph.cpp | 3 +++ AnnService/src/Core/Common/WorkSpacePool.cpp | 3 +++ AnnService/src/Core/CommonDataStructure.cpp | 3 +++ AnnService/src/Core/KDT/KDTIndex.cpp | 3 +++ AnnService/src/Core/MetadataSet.cpp | 3 +++ AnnService/src/Core/VectorIndex.cpp | 3 +++ AnnService/src/Core/VectorSet.cpp | 3 +++ AnnService/src/Helper/ArgumentsParser.cpp | 3 +++ AnnService/src/Helper/Base64Encode.cpp | 3 +++ AnnService/src/Helper/CommonHelper.cpp | 3 +++ AnnService/src/Helper/Concurrent.cpp | 3 +++ AnnService/src/Helper/SimpleIniReader.cpp | 3 +++ AnnService/src/IndexBuilder/Options.cpp | 3 +++ AnnService/src/IndexBuilder/ThreadPool.cpp | 3 +++ AnnService/src/IndexBuilder/VectorSetReader.cpp | 3 +++ .../src/IndexBuilder/VectorSetReaders/DefaultReader.cpp | 3 +++ AnnService/src/IndexBuilder/main.cpp | 3 +++ AnnService/src/Server/QueryParser.cpp | 3 +++ AnnService/src/Server/SearchExecutionContext.cpp | 3 +++ AnnService/src/Server/SearchExecutor.cpp | 3 +++ AnnService/src/Server/SearchService.cpp | 3 +++ AnnService/src/Server/ServiceContext.cpp | 3 +++ AnnService/src/Server/ServiceSettings.cpp | 3 +++ AnnService/src/Server/main.cpp | 3 +++ AnnService/src/Socket/Client.cpp | 3 +++ AnnService/src/Socket/Common.cpp | 3 +++ AnnService/src/Socket/Connection.cpp | 3 +++ AnnService/src/Socket/ConnectionManager.cpp | 3 +++ AnnService/src/Socket/Packet.cpp | 3 +++ AnnService/src/Socket/RemoteSearchQuery.cpp | 3 +++ AnnService/src/Socket/Server.cpp | 3 +++ CMakeLists.txt | 5 ++++- PythonWrapper/CMakeLists.txt | 3 +++ PythonWrapper/inc/ClientInterface.h | 3 +++ PythonWrapper/inc/CoreInterface.h | 3 +++ PythonWrapper/inc/TransferDataType.h | 3 +++ PythonWrapper/src/ClientInterface.cpp | 3 +++ PythonWrapper/src/CoreInterface.cpp | 3 +++ Search/CMakeLists.txt | 3 +++ Search/main.cpp | 3 +++ Test/CMakeLists.txt | 3 +++ Test/inc/Test.h | 3 +++ Test/src/AlgoTest.cpp | 3 +++ Test/src/Base64HelperTest.cpp | 3 +++ Test/src/CommonHelperTest.cpp | 3 +++ Test/src/DistanceTest.cpp | 3 +++ Test/src/IniReaderTest.cpp | 3 +++ Test/src/StringConvertTest.cpp | 3 +++ Test/src/main.cpp | 3 +++ 114 files changed, 343 insertions(+), 2 deletions(-) diff --git a/AnnService/CMakeLists.txt b/AnnService/CMakeLists.txt index f5102115..3c65409e 100644 --- a/AnnService/CMakeLists.txt +++ b/AnnService/CMakeLists.txt @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h) file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp) diff --git a/AnnService/inc/Aggregator/AggregatorContext.h b/AnnService/inc/Aggregator/AggregatorContext.h index a4df9567..97ccc197 100644 --- a/AnnService/inc/Aggregator/AggregatorContext.h +++ b/AnnService/inc/Aggregator/AggregatorContext.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_AGGREGATOR_AGGREGATORCONTEXT_H_ #define _SPTAG_AGGREGATOR_AGGREGATORCONTEXT_H_ diff --git a/AnnService/inc/Aggregator/AggregatorExecutionContext.h b/AnnService/inc/Aggregator/AggregatorExecutionContext.h index cc34b847..12948a21 100644 --- a/AnnService/inc/Aggregator/AggregatorExecutionContext.h +++ b/AnnService/inc/Aggregator/AggregatorExecutionContext.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_AGGREGATOR_AGGREGATOREXECUTIONCONTEXT_H_ #define _SPTAG_AGGREGATOR_AGGREGATOREXECUTIONCONTEXT_H_ diff --git a/AnnService/inc/Aggregator/AggregatorService.h b/AnnService/inc/Aggregator/AggregatorService.h index 7984adfd..4d864aa5 100644 --- a/AnnService/inc/Aggregator/AggregatorService.h +++ b/AnnService/inc/Aggregator/AggregatorService.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_AGGREGATOR_AGGREGATORSERVICE_H_ #define _SPTAG_AGGREGATOR_AGGREGATORSERVICE_H_ diff --git a/AnnService/inc/Aggregator/AggregatorSettings.h b/AnnService/inc/Aggregator/AggregatorSettings.h index 56ec4e2f..cb1e9fe7 100644 --- a/AnnService/inc/Aggregator/AggregatorSettings.h +++ b/AnnService/inc/Aggregator/AggregatorSettings.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_AGGREGATOR_AGGREGATORSETTINGS_H_ #define _SPTAG_AGGREGATOR_AGGREGATORSETTINGS_H_ diff --git a/AnnService/inc/Client/ClientWrapper.h b/AnnService/inc/Client/ClientWrapper.h index a94c8614..d96a6706 100644 --- a/AnnService/inc/Client/ClientWrapper.h +++ b/AnnService/inc/Client/ClientWrapper.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_CLIENT_CLIENTWRAPPER_H_ #define _SPTAG_CLIENT_CLIENTWRAPPER_H_ diff --git a/AnnService/inc/Client/Options.h b/AnnService/inc/Client/Options.h index a52dc676..062061f0 100644 --- a/AnnService/inc/Client/Options.h +++ b/AnnService/inc/Client/Options.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_CLIENT_OPTIONS_H_ #define _SPTAG_CLIENT_OPTIONS_H_ diff --git a/AnnService/inc/Core/BKT/Index.h b/AnnService/inc/Core/BKT/Index.h index 2ead4acd..329aacbd 100644 --- a/AnnService/inc/Core/BKT/Index.h +++ b/AnnService/inc/Core/BKT/Index.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_BKT_INDEX_H_ #define _SPTAG_BKT_INDEX_H_ diff --git a/AnnService/inc/Core/BKT/ParameterDefinitionList.h b/AnnService/inc/Core/BKT/ParameterDefinitionList.h index 15c0cc01..716c9b81 100644 --- a/AnnService/inc/Core/BKT/ParameterDefinitionList.h +++ b/AnnService/inc/Core/BKT/ParameterDefinitionList.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifdef DefineBKTParameter // DefineBKTParameter(VarName, VarType, DefaultValue, RepresentStr) diff --git a/AnnService/inc/Core/Common.h b/AnnService/inc/Core/Common.h index 7d61675b..91c6239b 100644 --- a/AnnService/inc/Core/Common.h +++ b/AnnService/inc/Core/Common.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_CORE_COMMONDEFS_H_ #define _SPTAG_CORE_COMMONDEFS_H_ diff --git a/AnnService/inc/Core/Common/BKTree.h b/AnnService/inc/Core/Common/BKTree.h index 70140621..74b592b7 100644 --- a/AnnService/inc/Core/Common/BKTree.h +++ b/AnnService/inc/Core/Common/BKTree.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_BKTREE_H_ #define _SPTAG_COMMON_BKTREE_H_ diff --git a/AnnService/inc/Core/Common/CommonUtils.h b/AnnService/inc/Core/Common/CommonUtils.h index f959f80c..ca5ed83f 100644 --- a/AnnService/inc/Core/Common/CommonUtils.h +++ b/AnnService/inc/Core/Common/CommonUtils.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_COMMONUTILS_H_ #define _SPTAG_COMMON_COMMONUTILS_H_ diff --git a/AnnService/inc/Core/Common/DataUtils.h b/AnnService/inc/Core/Common/DataUtils.h index 2ecd7faa..affc4687 100644 --- a/AnnService/inc/Core/Common/DataUtils.h +++ b/AnnService/inc/Core/Common/DataUtils.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_DATAUTILS_H_ #define _SPTAG_COMMON_DATAUTILS_H_ diff --git a/AnnService/inc/Core/Common/Dataset.h b/AnnService/inc/Core/Common/Dataset.h index fd7817f8..516d28a1 100644 --- a/AnnService/inc/Core/Common/Dataset.h +++ b/AnnService/inc/Core/Common/Dataset.h @@ -1,6 +1,9 @@ #ifndef _SPTAG_COMMON_DATASET_H_ #define _SPTAG_COMMON_DATASET_H_ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include #if defined(_MSC_VER) || defined(__INTEL_COMPILER) diff --git a/AnnService/inc/Core/Common/DistanceUtils.h b/AnnService/inc/Core/Common/DistanceUtils.h index c64e63da..75a35761 100644 --- a/AnnService/inc/Core/Common/DistanceUtils.h +++ b/AnnService/inc/Core/Common/DistanceUtils.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_DISTANCEUTILS_H_ #define _SPTAG_COMMON_DISTANCEUTILS_H_ diff --git a/AnnService/inc/Core/Common/FineGrainedLock.h b/AnnService/inc/Core/Common/FineGrainedLock.h index e1d5dc39..d8bfbaa9 100644 --- a/AnnService/inc/Core/Common/FineGrainedLock.h +++ b/AnnService/inc/Core/Common/FineGrainedLock.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_FINEGRAINEDLOCK_H_ #define _SPTAG_COMMON_FINEGRAINEDLOCK_H_ diff --git a/AnnService/inc/Core/Common/Heap.h b/AnnService/inc/Core/Common/Heap.h index 7d4dcc56..261aa498 100644 --- a/AnnService/inc/Core/Common/Heap.h +++ b/AnnService/inc/Core/Common/Heap.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_HEAP_H_ #define _SPTAG_COMMON_HEAP_H_ diff --git a/AnnService/inc/Core/Common/KDTree.h b/AnnService/inc/Core/Common/KDTree.h index ab2e1779..f6a5aebb 100644 --- a/AnnService/inc/Core/Common/KDTree.h +++ b/AnnService/inc/Core/Common/KDTree.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_KDTREE_H_ #define _SPTAG_COMMON_KDTREE_H_ diff --git a/AnnService/inc/Core/Common/NeighborhoodGraph.h b/AnnService/inc/Core/Common/NeighborhoodGraph.h index 9e646b69..3da45a1c 100644 --- a/AnnService/inc/Core/Common/NeighborhoodGraph.h +++ b/AnnService/inc/Core/Common/NeighborhoodGraph.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_NG_H_ #define _SPTAG_COMMON_NG_H_ diff --git a/AnnService/inc/Core/Common/QueryResultSet.h b/AnnService/inc/Core/Common/QueryResultSet.h index f410b29d..52745322 100644 --- a/AnnService/inc/Core/Common/QueryResultSet.h +++ b/AnnService/inc/Core/Common/QueryResultSet.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_QUERYRESULTSET_H_ #define _SPTAG_COMMON_QUERYRESULTSET_H_ diff --git a/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h b/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h index 83d5ee4a..49925086 100644 --- a/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h +++ b/AnnService/inc/Core/Common/RelativeNeighborhoodGraph.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_RNG_H_ #define _SPTAG_COMMON_RNG_H_ diff --git a/AnnService/inc/Core/Common/WorkSpace.h b/AnnService/inc/Core/Common/WorkSpace.h index f2ce87a0..74d0a503 100644 --- a/AnnService/inc/Core/Common/WorkSpace.h +++ b/AnnService/inc/Core/Common/WorkSpace.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_WORKSPACE_H_ #define _SPTAG_COMMON_WORKSPACE_H_ diff --git a/AnnService/inc/Core/Common/WorkSpacePool.h b/AnnService/inc/Core/Common/WorkSpacePool.h index e25afdc5..9a7154e6 100644 --- a/AnnService/inc/Core/Common/WorkSpacePool.h +++ b/AnnService/inc/Core/Common/WorkSpacePool.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMON_WORKSPACEPOOL_H_ #define _SPTAG_COMMON_WORKSPACEPOOL_H_ diff --git a/AnnService/inc/Core/CommonDataStructure.h b/AnnService/inc/Core/CommonDataStructure.h index 7a747793..d357e975 100644 --- a/AnnService/inc/Core/CommonDataStructure.h +++ b/AnnService/inc/Core/CommonDataStructure.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_COMMONDATASTRUCTURE_H_ #define _SPTAG_COMMONDATASTRUCTURE_H_ diff --git a/AnnService/inc/Core/DefinitionList.h b/AnnService/inc/Core/DefinitionList.h index 23b1dc46..ed6f33e9 100644 --- a/AnnService/inc/Core/DefinitionList.h +++ b/AnnService/inc/Core/DefinitionList.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifdef DefineVectorValueType DefineVectorValueType(Int8, std::int8_t) diff --git a/AnnService/inc/Core/KDT/Index.h b/AnnService/inc/Core/KDT/Index.h index 5dd094e1..ce2b34b3 100644 --- a/AnnService/inc/Core/KDT/Index.h +++ b/AnnService/inc/Core/KDT/Index.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_KDT_INDEX_H_ #define _SPTAG_KDT_INDEX_H_ diff --git a/AnnService/inc/Core/KDT/ParameterDefinitionList.h b/AnnService/inc/Core/KDT/ParameterDefinitionList.h index 8ca2ef74..62dbc3a1 100644 --- a/AnnService/inc/Core/KDT/ParameterDefinitionList.h +++ b/AnnService/inc/Core/KDT/ParameterDefinitionList.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifdef DefineKDTParameter // DefineKDTParameter(VarName, VarType, DefaultValue, RepresentStr) diff --git a/AnnService/inc/Core/MetadataSet.h b/AnnService/inc/Core/MetadataSet.h index f476531e..259f7dd6 100644 --- a/AnnService/inc/Core/MetadataSet.h +++ b/AnnService/inc/Core/MetadataSet.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_METADATASET_H_ #define _SPTAG_METADATASET_H_ diff --git a/AnnService/inc/Core/SearchQuery.h b/AnnService/inc/Core/SearchQuery.h index 8b8c5f7b..ede82cc9 100644 --- a/AnnService/inc/Core/SearchQuery.h +++ b/AnnService/inc/Core/SearchQuery.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SEARCHQUERY_H_ #define _SPTAG_SEARCHQUERY_H_ diff --git a/AnnService/inc/Core/VectorIndex.h b/AnnService/inc/Core/VectorIndex.h index cbe1b579..0156bff6 100644 --- a/AnnService/inc/Core/VectorIndex.h +++ b/AnnService/inc/Core/VectorIndex.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_VECTORINDEX_H_ #define _SPTAG_VECTORINDEX_H_ diff --git a/AnnService/inc/Core/VectorSet.h b/AnnService/inc/Core/VectorSet.h index 09a6620a..4d0727aa 100644 --- a/AnnService/inc/Core/VectorSet.h +++ b/AnnService/inc/Core/VectorSet.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_VECTORSET_H_ #define _SPTAG_VECTORSET_H_ diff --git a/AnnService/inc/Helper/ArgumentsParser.h b/AnnService/inc/Helper/ArgumentsParser.h index 8d3ed7aa..0ae19b8e 100644 --- a/AnnService/inc/Helper/ArgumentsParser.h +++ b/AnnService/inc/Helper/ArgumentsParser.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_HELPER_ARGUMENTSPARSER_H_ #define _SPTAG_HELPER_ARGUMENTSPARSER_H_ diff --git a/AnnService/inc/Helper/Base64Encode.h b/AnnService/inc/Helper/Base64Encode.h index 9dd8883e..8e791934 100644 --- a/AnnService/inc/Helper/Base64Encode.h +++ b/AnnService/inc/Helper/Base64Encode.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_HELPER_BASE64ENCODE_H_ #define _SPTAG_HELPER_BASE64ENCODE_H_ diff --git a/AnnService/inc/Helper/CommonHelper.h b/AnnService/inc/Helper/CommonHelper.h index 84ac4c8e..7f147847 100644 --- a/AnnService/inc/Helper/CommonHelper.h +++ b/AnnService/inc/Helper/CommonHelper.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_HELPER_COMMONHELPER_H_ #define _SPTAG_HELPER_COMMONHELPER_H_ diff --git a/AnnService/inc/Helper/Concurrent.h b/AnnService/inc/Helper/Concurrent.h index 77a95c90..41c9c5ff 100644 --- a/AnnService/inc/Helper/Concurrent.h +++ b/AnnService/inc/Helper/Concurrent.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_HELPER_CONCURRENT_H_ #define _SPTAG_HELPER_CONCURRENT_H_ diff --git a/AnnService/inc/Helper/SimpleIniReader.h b/AnnService/inc/Helper/SimpleIniReader.h index fd9ccd77..31b797ab 100644 --- a/AnnService/inc/Helper/SimpleIniReader.h +++ b/AnnService/inc/Helper/SimpleIniReader.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_HELPER_INIREADER_H_ #define _SPTAG_HELPER_INIREADER_H_ diff --git a/AnnService/inc/Helper/StringConvert.h b/AnnService/inc/Helper/StringConvert.h index 7aeee749..b6e53df7 100644 --- a/AnnService/inc/Helper/StringConvert.h +++ b/AnnService/inc/Helper/StringConvert.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_HELPER_STRINGCONVERTHELPER_H_ #define _SPTAG_HELPER_STRINGCONVERTHELPER_H_ diff --git a/AnnService/inc/IndexBuilder/Options.h b/AnnService/inc/IndexBuilder/Options.h index b143a9a9..7c939efa 100644 --- a/AnnService/inc/IndexBuilder/Options.h +++ b/AnnService/inc/IndexBuilder/Options.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_INDEXBUILDER_OPTIONS_H_ #define _SPTAG_INDEXBUILDER_OPTIONS_H_ diff --git a/AnnService/inc/IndexBuilder/ThreadPool.h b/AnnService/inc/IndexBuilder/ThreadPool.h index 48b90bd5..7256f71a 100644 --- a/AnnService/inc/IndexBuilder/ThreadPool.h +++ b/AnnService/inc/IndexBuilder/ThreadPool.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_INDEXBUILDER_THREADPOOL_H_ #define _SPTAG_INDEXBUILDER_THREADPOOL_H_ diff --git a/AnnService/inc/IndexBuilder/VectorSetReader.h b/AnnService/inc/IndexBuilder/VectorSetReader.h index bfdb6e17..6bb3026a 100644 --- a/AnnService/inc/IndexBuilder/VectorSetReader.h +++ b/AnnService/inc/IndexBuilder/VectorSetReader.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_INDEXBUILDER_VECTORSETREADER_H_ #define _SPTAG_INDEXBUILDER_VECTORSETREADER_H_ diff --git a/AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h b/AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h index a2102498..f12ec7f5 100644 --- a/AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h +++ b/AnnService/inc/IndexBuilder/VectorSetReaders/DefaultReader.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_ #define _SPTAG_INDEXBUILDER_VECTORSETREADERS_DEFAULTREADER_H_ diff --git a/AnnService/inc/Server/QueryParser.h b/AnnService/inc/Server/QueryParser.h index 6df80733..9444e408 100644 --- a/AnnService/inc/Server/QueryParser.h +++ b/AnnService/inc/Server/QueryParser.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SERVER_QUERYPARSER_H_ #define _SPTAG_SERVER_QUERYPARSER_H_ diff --git a/AnnService/inc/Server/SearchExecutionContext.h b/AnnService/inc/Server/SearchExecutionContext.h index 00c5eab9..cba4df46 100644 --- a/AnnService/inc/Server/SearchExecutionContext.h +++ b/AnnService/inc/Server/SearchExecutionContext.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SERVER_SEARCHEXECUTIONCONTEXT_H_ #define _SPTAG_SERVER_SEARCHEXECUTIONCONTEXT_H_ diff --git a/AnnService/inc/Server/SearchExecutor.h b/AnnService/inc/Server/SearchExecutor.h index 73a3a15f..20183265 100644 --- a/AnnService/inc/Server/SearchExecutor.h +++ b/AnnService/inc/Server/SearchExecutor.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SERVER_SEARCHEXECUTOR_H_ #define _SPTAG_SERVER_SEARCHEXECUTOR_H_ diff --git a/AnnService/inc/Server/SearchService.h b/AnnService/inc/Server/SearchService.h index c3290892..34d0c606 100644 --- a/AnnService/inc/Server/SearchService.h +++ b/AnnService/inc/Server/SearchService.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SERVER_SERVICE_H_ #define _SPTAG_SERVER_SERVICE_H_ diff --git a/AnnService/inc/Server/ServiceContext.h b/AnnService/inc/Server/ServiceContext.h index b18a8154..b1a7b840 100644 --- a/AnnService/inc/Server/ServiceContext.h +++ b/AnnService/inc/Server/ServiceContext.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SERVER_SERVICECONTEX_H_ #define _SPTAG_SERVER_SERVICECONTEX_H_ diff --git a/AnnService/inc/Server/ServiceSettings.h b/AnnService/inc/Server/ServiceSettings.h index bc51e6ba..90774873 100644 --- a/AnnService/inc/Server/ServiceSettings.h +++ b/AnnService/inc/Server/ServiceSettings.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SERVER_SERVICESTTINGS_H_ #define _SPTAG_SERVER_SERVICESTTINGS_H_ diff --git a/AnnService/inc/Socket/Client.h b/AnnService/inc/Socket/Client.h index 4857b3a7..a57465df 100644 --- a/AnnService/inc/Socket/Client.h +++ b/AnnService/inc/Socket/Client.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_CLIENT_H_ #define _SPTAG_SOCKET_CLIENT_H_ diff --git a/AnnService/inc/Socket/Common.h b/AnnService/inc/Socket/Common.h index ad1a3df5..dc06af1b 100644 --- a/AnnService/inc/Socket/Common.h +++ b/AnnService/inc/Socket/Common.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_COMMON_H_ #define _SPTAG_SOCKET_COMMON_H_ diff --git a/AnnService/inc/Socket/Connection.h b/AnnService/inc/Socket/Connection.h index b4e3b1c3..1d75d093 100644 --- a/AnnService/inc/Socket/Connection.h +++ b/AnnService/inc/Socket/Connection.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_CONNECTION_H_ #define _SPTAG_SOCKET_CONNECTION_H_ diff --git a/AnnService/inc/Socket/ConnectionManager.h b/AnnService/inc/Socket/ConnectionManager.h index 8e27f3dd..e487c610 100644 --- a/AnnService/inc/Socket/ConnectionManager.h +++ b/AnnService/inc/Socket/ConnectionManager.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_CONNECTIONMANAGER_H_ #define _SPTAG_SOCKET_CONNECTIONMANAGER_H_ diff --git a/AnnService/inc/Socket/Packet.h b/AnnService/inc/Socket/Packet.h index e7a85c61..8c99b09f 100644 --- a/AnnService/inc/Socket/Packet.h +++ b/AnnService/inc/Socket/Packet.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_PACKET_H_ #define _SPTAG_SOCKET_PACKET_H_ diff --git a/AnnService/inc/Socket/RemoteSearchQuery.h b/AnnService/inc/Socket/RemoteSearchQuery.h index 40de407a..900aa6cb 100644 --- a/AnnService/inc/Socket/RemoteSearchQuery.h +++ b/AnnService/inc/Socket/RemoteSearchQuery.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_REMOTESEARCHQUERY_H_ #define _SPTAG_SOCKET_REMOTESEARCHQUERY_H_ diff --git a/AnnService/inc/Socket/ResourceManager.h b/AnnService/inc/Socket/ResourceManager.h index b19dad12..404cac83 100644 --- a/AnnService/inc/Socket/ResourceManager.h +++ b/AnnService/inc/Socket/ResourceManager.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_RESOURCEMANAGER_H_ #define _SPTAG_SOCKET_RESOURCEMANAGER_H_ diff --git a/AnnService/inc/Socket/Server.h b/AnnService/inc/Socket/Server.h index 0b67c838..aac97bf8 100644 --- a/AnnService/inc/Socket/Server.h +++ b/AnnService/inc/Socket/Server.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_SERVER_H_ #define _SPTAG_SOCKET_SERVER_H_ diff --git a/AnnService/inc/Socket/SimpleSerialization.h b/AnnService/inc/Socket/SimpleSerialization.h index 71a34b03..6da92562 100644 --- a/AnnService/inc/Socket/SimpleSerialization.h +++ b/AnnService/inc/Socket/SimpleSerialization.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_SOCKET_SIMPLESERIALIZATION_H_ #define _SPTAG_SOCKET_SIMPLESERIALIZATION_H_ diff --git a/AnnService/src/Aggregator/AggregatorContext.cpp b/AnnService/src/Aggregator/AggregatorContext.cpp index 229d386e..dd2f4b20 100644 --- a/AnnService/src/Aggregator/AggregatorContext.cpp +++ b/AnnService/src/Aggregator/AggregatorContext.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Aggregator/AggregatorContext.h" #include "inc/Helper/SimpleIniReader.h" diff --git a/AnnService/src/Aggregator/AggregatorExecutionContext.cpp b/AnnService/src/Aggregator/AggregatorExecutionContext.cpp index 3ca8ad1b..8f7a2837 100644 --- a/AnnService/src/Aggregator/AggregatorExecutionContext.cpp +++ b/AnnService/src/Aggregator/AggregatorExecutionContext.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Aggregator/AggregatorExecutionContext.h" using namespace SPTAG; diff --git a/AnnService/src/Aggregator/AggregatorService.cpp b/AnnService/src/Aggregator/AggregatorService.cpp index b0fd8f47..89d974c9 100644 --- a/AnnService/src/Aggregator/AggregatorService.cpp +++ b/AnnService/src/Aggregator/AggregatorService.cpp @@ -1,5 +1,7 @@ -#include "inc/Aggregator/AggregatorService.h" +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "inc/Aggregator/AggregatorService.h" using namespace SPTAG; using namespace SPTAG::Aggregator; diff --git a/AnnService/src/Aggregator/AggregatorSettings.cpp b/AnnService/src/Aggregator/AggregatorSettings.cpp index 55e10779..a3e2bc68 100644 --- a/AnnService/src/Aggregator/AggregatorSettings.cpp +++ b/AnnService/src/Aggregator/AggregatorSettings.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Aggregator/AggregatorSettings.h" using namespace SPTAG; diff --git a/AnnService/src/Aggregator/main.cpp b/AnnService/src/Aggregator/main.cpp index 23ad1155..2a06025d 100644 --- a/AnnService/src/Aggregator/main.cpp +++ b/AnnService/src/Aggregator/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Aggregator/AggregatorService.h" SPTAG::Aggregator::AggregatorService g_service; diff --git a/AnnService/src/Client/ClientWrapper.cpp b/AnnService/src/Client/ClientWrapper.cpp index f2cb89cd..7e91c631 100644 --- a/AnnService/src/Client/ClientWrapper.cpp +++ b/AnnService/src/Client/ClientWrapper.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Client/ClientWrapper.h" using namespace SPTAG; diff --git a/AnnService/src/Client/Options.cpp b/AnnService/src/Client/Options.cpp index 870aac8f..bb067d3d 100644 --- a/AnnService/src/Client/Options.cpp +++ b/AnnService/src/Client/Options.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Client/Options.h" #include "inc/Helper/StringConvert.h" diff --git a/AnnService/src/Client/main.cpp b/AnnService/src/Client/main.cpp index 2b3a2f76..b15121dc 100644 --- a/AnnService/src/Client/main.cpp +++ b/AnnService/src/Client/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Client/Options.h" #include "inc/Client/ClientWrapper.h" diff --git a/AnnService/src/Core/BKT/BKTIndex.cpp b/AnnService/src/Core/BKT/BKTIndex.cpp index c6f3d466..ef25f30c 100644 --- a/AnnService/src/Core/BKT/BKTIndex.cpp +++ b/AnnService/src/Core/BKT/BKTIndex.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/BKT/Index.h" #pragma warning(disable:4996) // 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. diff --git a/AnnService/src/Core/Common/NeighborhoodGraph.cpp b/AnnService/src/Core/Common/NeighborhoodGraph.cpp index 20ed8630..94115dd0 100644 --- a/AnnService/src/Core/Common/NeighborhoodGraph.cpp +++ b/AnnService/src/Core/Common/NeighborhoodGraph.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/Common/NeighborhoodGraph.h" #include "inc/Core/Common/RelativeNeighborhoodGraph.h" diff --git a/AnnService/src/Core/Common/WorkSpacePool.cpp b/AnnService/src/Core/Common/WorkSpacePool.cpp index 6ac6ee88..036e281e 100644 --- a/AnnService/src/Core/Common/WorkSpacePool.cpp +++ b/AnnService/src/Core/Common/WorkSpacePool.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/Common/WorkSpacePool.h" using namespace SPTAG; diff --git a/AnnService/src/Core/CommonDataStructure.cpp b/AnnService/src/Core/CommonDataStructure.cpp index 719ae951..7f59e706 100644 --- a/AnnService/src/Core/CommonDataStructure.cpp +++ b/AnnService/src/Core/CommonDataStructure.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/CommonDataStructure.h" using namespace SPTAG; diff --git a/AnnService/src/Core/KDT/KDTIndex.cpp b/AnnService/src/Core/KDT/KDTIndex.cpp index 00bb877f..546be638 100644 --- a/AnnService/src/Core/KDT/KDTIndex.cpp +++ b/AnnService/src/Core/KDT/KDTIndex.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/KDT/Index.h" #pragma warning(disable:4996) // 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation, use _CRT_SECURE_NO_WARNINGS. See online help for details. diff --git a/AnnService/src/Core/MetadataSet.cpp b/AnnService/src/Core/MetadataSet.cpp index 999c5b47..a45da038 100644 --- a/AnnService/src/Core/MetadataSet.cpp +++ b/AnnService/src/Core/MetadataSet.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/MetadataSet.h" #include diff --git a/AnnService/src/Core/VectorIndex.cpp b/AnnService/src/Core/VectorIndex.cpp index af6cf601..657978b7 100644 --- a/AnnService/src/Core/VectorIndex.cpp +++ b/AnnService/src/Core/VectorIndex.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/VectorIndex.h" #include "inc/Core/Common/DataUtils.h" #include "inc/Helper/CommonHelper.h" diff --git a/AnnService/src/Core/VectorSet.cpp b/AnnService/src/Core/VectorSet.cpp index 99f801fe..36178d62 100644 --- a/AnnService/src/Core/VectorSet.cpp +++ b/AnnService/src/Core/VectorSet.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Core/VectorSet.h" using namespace SPTAG; diff --git a/AnnService/src/Helper/ArgumentsParser.cpp b/AnnService/src/Helper/ArgumentsParser.cpp index b20df09c..4f630ec0 100644 --- a/AnnService/src/Helper/ArgumentsParser.cpp +++ b/AnnService/src/Helper/ArgumentsParser.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Helper/ArgumentsParser.h" using namespace SPTAG::Helper; diff --git a/AnnService/src/Helper/Base64Encode.cpp b/AnnService/src/Helper/Base64Encode.cpp index cafaeb33..5992fa5a 100644 --- a/AnnService/src/Helper/Base64Encode.cpp +++ b/AnnService/src/Helper/Base64Encode.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Helper/Base64Encode.h" using namespace SPTAG; diff --git a/AnnService/src/Helper/CommonHelper.cpp b/AnnService/src/Helper/CommonHelper.cpp index 4d472a95..2d4dc0de 100644 --- a/AnnService/src/Helper/CommonHelper.cpp +++ b/AnnService/src/Helper/CommonHelper.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Helper/CommonHelper.h" #include diff --git a/AnnService/src/Helper/Concurrent.cpp b/AnnService/src/Helper/Concurrent.cpp index 1050d172..36988cb4 100644 --- a/AnnService/src/Helper/Concurrent.cpp +++ b/AnnService/src/Helper/Concurrent.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Helper/Concurrent.h" using namespace SPTAG; diff --git a/AnnService/src/Helper/SimpleIniReader.cpp b/AnnService/src/Helper/SimpleIniReader.cpp index 0d4a746a..7456ad9d 100644 --- a/AnnService/src/Helper/SimpleIniReader.cpp +++ b/AnnService/src/Helper/SimpleIniReader.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Helper/SimpleIniReader.h" #include "inc/Helper/CommonHelper.h" diff --git a/AnnService/src/IndexBuilder/Options.cpp b/AnnService/src/IndexBuilder/Options.cpp index cdfe7307..d0fcd0fd 100644 --- a/AnnService/src/IndexBuilder/Options.cpp +++ b/AnnService/src/IndexBuilder/Options.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/IndexBuilder/Options.h" #include "inc/Helper/StringConvert.h" diff --git a/AnnService/src/IndexBuilder/ThreadPool.cpp b/AnnService/src/IndexBuilder/ThreadPool.cpp index 0ac09d24..0ecddc12 100644 --- a/AnnService/src/IndexBuilder/ThreadPool.cpp +++ b/AnnService/src/IndexBuilder/ThreadPool.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/IndexBuilder/ThreadPool.h" #include diff --git a/AnnService/src/IndexBuilder/VectorSetReader.cpp b/AnnService/src/IndexBuilder/VectorSetReader.cpp index 40d83fa8..e50f6f5e 100644 --- a/AnnService/src/IndexBuilder/VectorSetReader.cpp +++ b/AnnService/src/IndexBuilder/VectorSetReader.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/IndexBuilder/VectorSetReader.h" #include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h" diff --git a/AnnService/src/IndexBuilder/VectorSetReaders/DefaultReader.cpp b/AnnService/src/IndexBuilder/VectorSetReaders/DefaultReader.cpp index 30639bcb..a7259867 100644 --- a/AnnService/src/IndexBuilder/VectorSetReaders/DefaultReader.cpp +++ b/AnnService/src/IndexBuilder/VectorSetReaders/DefaultReader.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/IndexBuilder/VectorSetReaders/DefaultReader.h" #include "inc/Helper/StringConvert.h" #include "inc/Helper/CommonHelper.h" diff --git a/AnnService/src/IndexBuilder/main.cpp b/AnnService/src/IndexBuilder/main.cpp index c265d758..1f83b818 100644 --- a/AnnService/src/IndexBuilder/main.cpp +++ b/AnnService/src/IndexBuilder/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/IndexBuilder/ThreadPool.h" #include "inc/IndexBuilder/Options.h" #include "inc/IndexBuilder/VectorSetReader.h" diff --git a/AnnService/src/Server/QueryParser.cpp b/AnnService/src/Server/QueryParser.cpp index f4d5ccb5..0fb47e93 100644 --- a/AnnService/src/Server/QueryParser.cpp +++ b/AnnService/src/Server/QueryParser.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Server/QueryParser.h" #include diff --git a/AnnService/src/Server/SearchExecutionContext.cpp b/AnnService/src/Server/SearchExecutionContext.cpp index 1d8112c5..36ff0824 100644 --- a/AnnService/src/Server/SearchExecutionContext.cpp +++ b/AnnService/src/Server/SearchExecutionContext.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Server/SearchExecutionContext.h" #include "inc/Helper/StringConvert.h" #include "inc/Helper/CommonHelper.h" diff --git a/AnnService/src/Server/SearchExecutor.cpp b/AnnService/src/Server/SearchExecutor.cpp index d89cc094..2bc3832d 100644 --- a/AnnService/src/Server/SearchExecutor.cpp +++ b/AnnService/src/Server/SearchExecutor.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Server/SearchExecutor.h" using namespace SPTAG; diff --git a/AnnService/src/Server/SearchService.cpp b/AnnService/src/Server/SearchService.cpp index fd9349d3..a85fdcdb 100644 --- a/AnnService/src/Server/SearchService.cpp +++ b/AnnService/src/Server/SearchService.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Server/SearchService.h" #include "inc/Server/SearchExecutor.h" #include "inc/Socket/RemoteSearchQuery.h" diff --git a/AnnService/src/Server/ServiceContext.cpp b/AnnService/src/Server/ServiceContext.cpp index 7af4e8bb..8d62b2c7 100644 --- a/AnnService/src/Server/ServiceContext.cpp +++ b/AnnService/src/Server/ServiceContext.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Server/ServiceContext.h" #include "inc/Helper/SimpleIniReader.h" #include "inc/Helper/CommonHelper.h" diff --git a/AnnService/src/Server/ServiceSettings.cpp b/AnnService/src/Server/ServiceSettings.cpp index d0d52e5f..d5115319 100644 --- a/AnnService/src/Server/ServiceSettings.cpp +++ b/AnnService/src/Server/ServiceSettings.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Server/ServiceSettings.h" using namespace SPTAG; diff --git a/AnnService/src/Server/main.cpp b/AnnService/src/Server/main.cpp index d4afc2f1..5aa5dc1e 100644 --- a/AnnService/src/Server/main.cpp +++ b/AnnService/src/Server/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Server/SearchService.h" SPTAG::Service::SearchService g_service; diff --git a/AnnService/src/Socket/Client.cpp b/AnnService/src/Socket/Client.cpp index cd05d769..9c4101e4 100644 --- a/AnnService/src/Socket/Client.cpp +++ b/AnnService/src/Socket/Client.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Socket/Client.h" #include diff --git a/AnnService/src/Socket/Common.cpp b/AnnService/src/Socket/Common.cpp index c71936d4..2cfc1178 100644 --- a/AnnService/src/Socket/Common.cpp +++ b/AnnService/src/Socket/Common.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Socket/Common.h" diff --git a/AnnService/src/Socket/Connection.cpp b/AnnService/src/Socket/Connection.cpp index 18908bb1..6e536cbf 100644 --- a/AnnService/src/Socket/Connection.cpp +++ b/AnnService/src/Socket/Connection.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Socket/Connection.h" #include "inc/Socket/ConnectionManager.h" diff --git a/AnnService/src/Socket/ConnectionManager.cpp b/AnnService/src/Socket/ConnectionManager.cpp index ea3906a8..eeb26c71 100644 --- a/AnnService/src/Socket/ConnectionManager.cpp +++ b/AnnService/src/Socket/ConnectionManager.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Socket/ConnectionManager.h" using namespace SPTAG::Socket; diff --git a/AnnService/src/Socket/Packet.cpp b/AnnService/src/Socket/Packet.cpp index d6596c5a..335400bb 100644 --- a/AnnService/src/Socket/Packet.cpp +++ b/AnnService/src/Socket/Packet.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Socket/Packet.h" #include "inc/Socket/SimpleSerialization.h" diff --git a/AnnService/src/Socket/RemoteSearchQuery.cpp b/AnnService/src/Socket/RemoteSearchQuery.cpp index ab1f7285..2cb45032 100644 --- a/AnnService/src/Socket/RemoteSearchQuery.cpp +++ b/AnnService/src/Socket/RemoteSearchQuery.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Socket/RemoteSearchQuery.h" #include "inc/Socket/SimpleSerialization.h" diff --git a/AnnService/src/Socket/Server.cpp b/AnnService/src/Socket/Server.cpp index 094de3de..86d60040 100644 --- a/AnnService/src/Socket/Server.cpp +++ b/AnnService/src/Socket/Server.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Socket/Server.h" #include diff --git a/CMakeLists.txt b/CMakeLists.txt index 617dd76f..cc5a893f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,7 @@ -cmake_minimum_required (VERSION 2.8) +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +cmake_minimum_required (VERSION 3.12) project (SPTAGLib) diff --git a/PythonWrapper/CMakeLists.txt b/PythonWrapper/CMakeLists.txt index 261c4181..9aa134b1 100644 --- a/PythonWrapper/CMakeLists.txt +++ b/PythonWrapper/CMakeLists.txt @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + find_package(Python2 COMPONENTS Development) if (Python2_FOUND) include_directories (${Python2_INCLUDE_DIRS}) diff --git a/PythonWrapper/inc/ClientInterface.h b/PythonWrapper/inc/ClientInterface.h index e13a0d25..210c9443 100644 --- a/PythonWrapper/inc/ClientInterface.h +++ b/PythonWrapper/inc/ClientInterface.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_PW_CLIENTINTERFACE_H_ #define _SPTAG_PW_CLIENTINTERFACE_H_ diff --git a/PythonWrapper/inc/CoreInterface.h b/PythonWrapper/inc/CoreInterface.h index 903cfe05..a62fad4c 100644 --- a/PythonWrapper/inc/CoreInterface.h +++ b/PythonWrapper/inc/CoreInterface.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_PW_COREINTERFACE_H_ #define _SPTAG_PW_COREINTERFACE_H_ diff --git a/PythonWrapper/inc/TransferDataType.h b/PythonWrapper/inc/TransferDataType.h index 12f81c6e..51ef9614 100644 --- a/PythonWrapper/inc/TransferDataType.h +++ b/PythonWrapper/inc/TransferDataType.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #ifndef _SPTAG_PW_TRANSFERDATATYPE_H_ #define _SPTAG_PW_TRANSFERDATATYPE_H_ diff --git a/PythonWrapper/src/ClientInterface.cpp b/PythonWrapper/src/ClientInterface.cpp index 16358a43..eeff8f45 100644 --- a/PythonWrapper/src/ClientInterface.cpp +++ b/PythonWrapper/src/ClientInterface.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/ClientInterface.h" #include "inc/Helper/CommonHelper.h" #include "inc/Helper/Concurrent.h" diff --git a/PythonWrapper/src/CoreInterface.cpp b/PythonWrapper/src/CoreInterface.cpp index fa14e415..1587e9ef 100644 --- a/PythonWrapper/src/CoreInterface.cpp +++ b/PythonWrapper/src/CoreInterface.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/CoreInterface.h" #include "inc/Helper/StringConvert.h" diff --git a/Search/CMakeLists.txt b/Search/CMakeLists.txt index acfa0b38..4fe79d68 100644 --- a/Search/CMakeLists.txt +++ b/Search/CMakeLists.txt @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h) file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp) diff --git a/Search/main.cpp b/Search/main.cpp index 7cd28b60..f6dd4f5b 100644 --- a/Search/main.cpp +++ b/Search/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Helper/SimpleIniReader.h" #include "inc/Helper/CommonHelper.h" #include "inc/Core/Common.h" diff --git a/Test/CMakeLists.txt b/Test/CMakeLists.txt index 93a9f1b8..e1179631 100644 --- a/Test/CMakeLists.txt +++ b/Test/CMakeLists.txt @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + if(NOT WIN32) ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK) message (STATUS "BOOST_TEST_DYN_LINK") diff --git a/Test/inc/Test.h b/Test/inc/Test.h index be44ae45..da6c096b 100644 --- a/Test/inc/Test.h +++ b/Test/inc/Test.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #pragma once #include diff --git a/Test/src/AlgoTest.cpp b/Test/src/AlgoTest.cpp index 1999d349..5a4d24c2 100644 --- a/Test/src/AlgoTest.cpp +++ b/Test/src/AlgoTest.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Test.h" #include "inc/Helper/SimpleIniReader.h" #include "inc/Core/VectorIndex.h" diff --git a/Test/src/Base64HelperTest.cpp b/Test/src/Base64HelperTest.cpp index 57fec601..2ead4753 100644 --- a/Test/src/Base64HelperTest.cpp +++ b/Test/src/Base64HelperTest.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Test.h" #include "inc/Helper/Base64Encode.h" diff --git a/Test/src/CommonHelperTest.cpp b/Test/src/CommonHelperTest.cpp index 581fa53b..17015642 100644 --- a/Test/src/CommonHelperTest.cpp +++ b/Test/src/CommonHelperTest.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Test.h" #include "inc/Helper/CommonHelper.h" diff --git a/Test/src/DistanceTest.cpp b/Test/src/DistanceTest.cpp index 548dd32e..26c5cee3 100644 --- a/Test/src/DistanceTest.cpp +++ b/Test/src/DistanceTest.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include #include "inc/Test.h" #include "inc/Core/Common/DistanceUtils.h" diff --git a/Test/src/IniReaderTest.cpp b/Test/src/IniReaderTest.cpp index 6f060f4d..c5dd0baa 100644 --- a/Test/src/IniReaderTest.cpp +++ b/Test/src/IniReaderTest.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Test.h" #include "inc/Helper/SimpleIniReader.h" diff --git a/Test/src/StringConvertTest.cpp b/Test/src/StringConvertTest.cpp index 82903e03..fa457deb 100644 --- a/Test/src/StringConvertTest.cpp +++ b/Test/src/StringConvertTest.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #include "inc/Test.h" #include "inc/Helper/StringConvert.h" diff --git a/Test/src/main.cpp b/Test/src/main.cpp index 91444f66..7bf61ea1 100644 --- a/Test/src/main.cpp +++ b/Test/src/main.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + #define BOOST_TEST_MAIN #define BOOST_TEST_MODULE Main #include "inc/Test.h" From ba2f20c4914e42bf0c389212b0aa6553e23e424c Mon Sep 17 00:00:00 2001 From: cheqi Date: Thu, 17 Jan 2019 11:00:55 +0800 Subject: [PATCH 06/14] fix copy right in dataset --- AnnService/inc/Core/Common/Dataset.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/AnnService/inc/Core/Common/Dataset.h b/AnnService/inc/Core/Common/Dataset.h index 516d28a1..b5ebbf34 100644 --- a/AnnService/inc/Core/Common/Dataset.h +++ b/AnnService/inc/Core/Common/Dataset.h @@ -1,9 +1,9 @@ -#ifndef _SPTAG_COMMON_DATASET_H_ -#define _SPTAG_COMMON_DATASET_H_ - // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#ifndef _SPTAG_COMMON_DATASET_H_ +#define _SPTAG_COMMON_DATASET_H_ + #include #if defined(_MSC_VER) || defined(__INTEL_COMPILER) From b35fd8dde5eb05f6435d33c6ad0ea64a1216ef34 Mon Sep 17 00:00:00 2001 From: cheqi Date: Thu, 17 Jan 2019 11:10:46 +0800 Subject: [PATCH 07/14] change kdt distance judgement --- AnnService/src/Core/KDT/KDTIndex.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AnnService/src/Core/KDT/KDTIndex.cpp b/AnnService/src/Core/KDT/KDTIndex.cpp index 546be638..d0ed6f50 100644 --- a/AnnService/src/Core/KDT/KDTIndex.cpp +++ b/AnnService/src/Core/KDT/KDTIndex.cpp @@ -57,13 +57,14 @@ namespace SPTAG } \ for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) \ _mm_prefetch((const char *)(m_pSamples)[node[i]], _MM_HINT_T0); \ + float upperBound = max(p_query.worstDist(), gnode.distance); \ bool bLocalOpt = true; \ for (int i = 0; i < m_pGraph.m_iNeighborhoodSize; i++) { \ int nn_index = node[i]; \ if (nn_index < 0) break; \ if (p_space.CheckAndSet(nn_index)) continue; \ float distance2leaf = m_fComputeDistance(p_query.GetTarget(), (m_pSamples)[nn_index], GetFeatureDim()); \ - if (distance2leaf <= p_query.worstDist() || distance2leaf < gnode.distance) bLocalOpt = false; \ + if (distance2leaf <= upperBound) bLocalOpt = false; \ p_space.m_iNumberOfCheckedLeaves++; \ p_space.m_NGQueue.insert(COMMON::HeapCell(nn_index, distance2leaf)); \ } \ From 92c5a8357cd2d6f1f3d9a32a7c0defe31cccd1c9 Mon Sep 17 00:00:00 2001 From: cheqi Date: Wed, 20 Feb 2019 21:36:23 +0800 Subject: [PATCH 08/14] change code structure and add more wrappers --- .gitignore | 16 ++ AnnService.users.props | 2 + AnnService/CMakeLists.txt | 7 +- .../IndexSearcher.vcxproj | 6 +- .../IndexSearcher.vcxproj.filters | 2 +- AnnService/inc/Core/CommonDataStructure.h | 4 + AnnService/src/Core/CommonDataStructure.cpp | 14 ++ .../src/IndexSearcher}/main.cpp | 0 CMakeLists.txt | 24 ++- PythonWrapper/CMakeLists.txt | 58 ------ SPTAG.sln | 56 +++++- Search/CMakeLists.txt | 17 -- Search/packages.config | 12 -- Wrappers/CMakeLists.txt | 105 ++++++++++ Wrappers/JavaClient.vcxproj | 190 ++++++++++++++++++ Wrappers/JavaClient.vcxproj.filters | 38 ++++ Wrappers/JavaCore.vcxproj | 137 +++++++++++++ Wrappers/JavaCore.vcxproj.filters | 38 ++++ .../PythonClient.vcxproj | 6 +- .../PythonClient.vcxproj.filters | 4 +- .../PythonCore.vcxproj | 10 +- .../PythonCore.vcxproj.filters | 5 +- Wrappers/PythonCore.vcxproj.user | 4 + .../inc/ClientInterface.h | 30 +-- .../inc/CoreInterface.h | 45 +---- Wrappers/inc/JavaClient.i | 16 ++ Wrappers/inc/JavaCommon.i | 17 ++ Wrappers/inc/JavaCore.i | 16 ++ Wrappers/inc/PythonClient.i | 16 ++ .../inc/PythonCommon.i | 4 +- Wrappers/inc/PythonCore.i | 16 ++ .../inc/TransferDataType.h | 2 - {PythonWrapper => Wrappers}/packages.config | 0 .../src/ClientInterface.cpp | 10 +- .../src/CoreInterface.cpp | 26 +-- 35 files changed, 750 insertions(+), 203 deletions(-) rename Search/Search.vcxproj => AnnService/IndexSearcher.vcxproj (98%) rename Search/Search.vcxproj.filters => AnnService/IndexSearcher.vcxproj.filters (94%) rename {Search => AnnService/src/IndexSearcher}/main.cpp (100%) delete mode 100644 PythonWrapper/CMakeLists.txt delete mode 100644 Search/CMakeLists.txt delete mode 100644 Search/packages.config create mode 100644 Wrappers/CMakeLists.txt create mode 100644 Wrappers/JavaClient.vcxproj create mode 100644 Wrappers/JavaClient.vcxproj.filters create mode 100644 Wrappers/JavaCore.vcxproj create mode 100644 Wrappers/JavaCore.vcxproj.filters rename {PythonWrapper => Wrappers}/PythonClient.vcxproj (97%) rename {PythonWrapper => Wrappers}/PythonClient.vcxproj.filters (92%) rename {PythonWrapper => Wrappers}/PythonCore.vcxproj (92%) rename {PythonWrapper => Wrappers}/PythonCore.vcxproj.filters (89%) create mode 100644 Wrappers/PythonCore.vcxproj.user rename {PythonWrapper => Wrappers}/inc/ClientInterface.h (72%) rename {PythonWrapper => Wrappers}/inc/CoreInterface.h (51%) create mode 100644 Wrappers/inc/JavaClient.i create mode 100644 Wrappers/inc/JavaCommon.i create mode 100644 Wrappers/inc/JavaCore.i create mode 100644 Wrappers/inc/PythonClient.i rename PythonWrapper/inc/PyByteArray.i => Wrappers/inc/PythonCommon.i (95%) create mode 100644 Wrappers/inc/PythonCore.i rename {PythonWrapper => Wrappers}/inc/TransferDataType.h (92%) rename {PythonWrapper => Wrappers}/packages.config (100%) rename {PythonWrapper => Wrappers}/src/ClientInterface.cpp (95%) rename {PythonWrapper => Wrappers}/src/CoreInterface.cpp (89%) diff --git a/.gitignore b/.gitignore index 57fdf2db..f87ba727 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,19 @@ dkms.conf /x64/Debug /packages /Search/Search.vcxproj.user +/AnnService/IndexSearcher.vcxproj.user +/Wrappers/inc/SWIGTYPE_p_RemoteSearchResult.java +/Wrappers/inc/SWIGTYPE_p_QueryResult.java +/Wrappers/inc/SPTAGJNI.java +/Wrappers/inc/SPTAGClientJNI.java +/Wrappers/inc/SPTAGClient.py +/Wrappers/inc/SPTAGClient.java +/Wrappers/inc/SPTAG.py +/Wrappers/inc/SPTAG.java +/Wrappers/inc/CoreInterface_pwrap.cpp +/Wrappers/inc/CoreInterface_jwrap.cpp +/Wrappers/inc/ClientInterface_pwrap.cpp +/Wrappers/inc/ClientInterface_jwrap.cpp +/Wrappers/inc/AnnIndex.java +/Wrappers/inc/AnnClient.java +/AnnService.users - Copy.props diff --git a/AnnService.users.props b/AnnService.users.props index 0f009098..e6523135 100644 --- a/AnnService.users.props +++ b/AnnService.users.props @@ -11,6 +11,8 @@ $(SolutionDir)\$(Platform)\$(Configuration)\ $(SolutionDir)\$(Platform)\$(Configuration)\ + + diff --git a/AnnService/CMakeLists.txt b/AnnService/CMakeLists.txt index 3c65409e..3ab88e8c 100644 --- a/AnnService/CMakeLists.txt +++ b/AnnService/CMakeLists.txt @@ -7,6 +7,7 @@ file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SO include_directories(${PROJECT_SOURCE_DIR}/AnnService) add_library (SPTAGLib SHARED ${SRC_FILES} ${HDR_FILES}) +target_link_libraries (SPTAGLib ${TBB_LIBRARIES}) add_library (SPTAGLibStatic STATIC ${SRC_FILES} ${HDR_FILES}) set_target_properties(SPTAGLibStatic PROPERTIES OUTPUT_NAME SPTAGLib) @@ -30,7 +31,11 @@ file(GLOB BUILDER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexB add_executable (indexbuilder ${BUILDER_FILES} ${BUILDER_HDR_FILES}) target_link_libraries(indexbuilder ${Boost_LIBRARIES} ${TBB_LIBRARIES}) -install(TARGETS SPTAGLib SPTAGLibStatic server client aggregator indexbuilder +file(GLOB SEARCHER_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/AnnService/src/IndexSearcher/*.cpp) +add_executable (indexsearcher ${SEARCHER_FILES} ${HDR_FILES}) +target_link_libraries(indexsearcher ${Boost_LIBRARIES} ${TBB_LIBRARIES}) + +install(TARGETS SPTAGLib SPTAGLibStatic server client aggregator indexbuilder indexsearcher RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) diff --git a/Search/Search.vcxproj b/AnnService/IndexSearcher.vcxproj similarity index 98% rename from Search/Search.vcxproj rename to AnnService/IndexSearcher.vcxproj index 8d2d1cec..c64c6e1a 100644 --- a/Search/Search.vcxproj +++ b/AnnService/IndexSearcher.vcxproj @@ -21,9 +21,9 @@ 15.0 {97615D3B-9FA0-469E-B229-95A91A5087E0} - Search + IndexSearcher 8.1 - Search + IndexSearcher @@ -136,7 +136,7 @@ - + diff --git a/Search/Search.vcxproj.filters b/AnnService/IndexSearcher.vcxproj.filters similarity index 94% rename from Search/Search.vcxproj.filters rename to AnnService/IndexSearcher.vcxproj.filters index 32a81453..82f7700c 100644 --- a/Search/Search.vcxproj.filters +++ b/AnnService/IndexSearcher.vcxproj.filters @@ -15,7 +15,7 @@ - + Source Files diff --git a/AnnService/inc/Core/CommonDataStructure.h b/AnnService/inc/Core/CommonDataStructure.h index d357e975..9fe2d720 100644 --- a/AnnService/inc/Core/CommonDataStructure.h +++ b/AnnService/inc/Core/CommonDataStructure.h @@ -32,8 +32,12 @@ class ByteArray std::uint8_t* Data() const; + void SetData(std::uint8_t* p_array); + std::size_t Length() const; + void SetLength(std::size_t p_length); + std::shared_ptr DataHolder() const; void Clear(); diff --git a/AnnService/src/Core/CommonDataStructure.cpp b/AnnService/src/Core/CommonDataStructure.cpp index 7f59e706..8e872b0c 100644 --- a/AnnService/src/Core/CommonDataStructure.cpp +++ b/AnnService/src/Core/CommonDataStructure.cpp @@ -101,6 +101,13 @@ ByteArray::Data() const } +void +ByteArray::SetData(std::uint8_t* p_array) +{ + m_data = p_array; +} + + std::size_t ByteArray::Length() const { @@ -108,6 +115,13 @@ ByteArray::Length() const } +void +ByteArray::SetLength(std::size_t p_length) +{ + m_length = p_length; +} + + std::shared_ptr ByteArray::DataHolder() const { diff --git a/Search/main.cpp b/AnnService/src/IndexSearcher/main.cpp similarity index 100% rename from Search/main.cpp rename to AnnService/src/IndexSearcher/main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cc5a893f..12b9bba1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,15 +86,29 @@ else() message (FATAL_ERROR "Could not find Boost 1.67!") endif() -find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX}) -if (TBB_LIBRARIES) +if (WIN32) + if (${CMAKE_SIZEOF_VOID_P} EQUAL "8") + set (TBB_LIBRARY_SUFFIX "lib/intel64/vc14") + else() + set (TBB_LIBRARY_SUFFIX "lib/ia32/vc14") + endif() + + find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES include) + find_library(TBB_LIBRARIES tbb${CMAKE_STATIC_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "C:/Program Files/Intel/TBB" PATH_SUFFIXES ${TBB_LIBRARY_SUFFIX}) +else() + find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_DIR} "/usr/" PATH_SUFFIXES include) + find_library(TBB_LIBRARIES libtbb${CMAKE_SHARED_LIBRARY_SUFFIX} HINTS ${TBB_DIR} "/usr/") +endif() + +if (TBB_INCLUDE_DIRS AND TBB_LIBRARIES) + include_directories (${TBB_INCLUDE_DIRS}) message (STATUS "Found TBB.") - message (STATUS "Library: ${TBB_LIBRARIES}") + message (STATUS "Include Path:" ${TBB_INCLUDE_DIRS}) + message (STATUS "Library:" ${TBB_LIBRARIES}) else() message (FATAL_ERROR "Could not find TBB!") endif() add_subdirectory (AnnService) -add_subdirectory (PythonWrapper) +add_subdirectory (Wrappers) add_subdirectory (Test) -add_subdirectory (Search) diff --git a/PythonWrapper/CMakeLists.txt b/PythonWrapper/CMakeLists.txt deleted file mode 100644 index 9aa134b1..00000000 --- a/PythonWrapper/CMakeLists.txt +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -find_package(Python2 COMPONENTS Development) -if (Python2_FOUND) - include_directories (${Python2_INCLUDE_DIRS}) - link_directories (${Python2_LIBRARY_DIRS}) - message (STATUS "Found Python.") - message (STATUS "Include Path: ${Python2_INCLUDE_DIRS}") - message (STATUS "Library Path: ${Python2_LIBRARIES}") - set (Python_LIBRARIES ${Python2_LIBRARIES}) -else() - message (STATUS "Could not find Python 2.7!") - find_package(Python3 COMPONENTS Development) - if (Python3_FOUND) - include_directories (${Python3_INCLUDE_DIRS}) - link_directories (${Python3_LIBRARY_DIRS}) - message (STATUS "Found Python.") - message (STATUS "Include Path: ${Python3_INCLUDE_DIRS}") - message (STATUS "Library Path: ${Python3_LIBRARIES}") - set (Python_LIBRARIES ${Python3_LIBRARIES}) - else () - message (FATAL_ERROR "Could not find python2 or python3!") - endif() -endif() - -if (WIN32) - set(PY_SUFFIX .pyd) -else() - set(PY_SUFFIX .so) -endif() - -execute_process(COMMAND swig -l${PROJECT_SOURCE_DIR}/PythonWrapper/inc/PyByteArray.i -python -c++ ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/CoreInterface.h) -execute_process(COMMAND swig -l${PROJECT_SOURCE_DIR}/PythonWrapper/inc/PyByteArray.i -python -c++ ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/ClientInterface.h) - -include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/PythonWrapper) -set(CMAKE_SHARED_LIBRARY_PREFIX "") - -file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/CoreInterface.h) -file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/PythonWrapper/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/CoreInterface_wrap.cxx) -add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) -set_target_properties(_SPTAG PROPERTIES SUFFIX ${PY_SUFFIX}) -target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES} ${TBB_LIBRARIES}) -add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH}) - -file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) -file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/PythonWrapper/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/ClientInterface_wrap.cxx) -add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) -set_target_properties(_SPTAGClient PROPERTIES SUFFIX ${PY_SUFFIX}) -target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES}) -add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH}) - -install(TARGETS _SPTAG _SPTAGClient - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) - -install(FILES ${PROJECT_SOURCE_DIR}/PythonWrapper/src/SPTAG.py ${PROJECT_SOURCE_DIR}/PythonWrapper/inc/SPTAGClient.py DESTINATION bin) diff --git a/SPTAG.sln b/SPTAG.sln index a63464d9..09c1564f 100644 --- a/SPTAG.sln +++ b/SPTAG.sln @@ -11,7 +11,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Server", "AnnService\Server {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PythonCore", "PythonWrapper\PythonCore.vcxproj", "{AF31947C-0495-42FE-A1AD-8F0DA2A679C7}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PythonCore", "Wrappers\PythonCore.vcxproj", "{AF31947C-0495-42FE-A1AD-8F0DA2A679C7}" ProjectSection(ProjectDependencies) = postProject {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} EndProjectSection @@ -33,7 +33,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Aggregator", "AnnService\Ag {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PythonClient", "PythonWrapper\PythonClient.vcxproj", "{9B014CF6-E3FB-4BD4-B3B1-D26297BB31AA}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PythonClient", "Wrappers\PythonClient.vcxproj", "{9B014CF6-E3FB-4BD4-B3B1-D26297BB31AA}" ProjectSection(ProjectDependencies) = postProject {F9A72303-6381-4C80-86FF-606A2F6F7B96} = {F9A72303-6381-4C80-86FF-606A2F6F7B96} {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} @@ -44,9 +44,27 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IndexBuilder", "AnnService\ {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IndexSearcher", "AnnService\IndexSearcher.vcxproj", "{97615D3B-9FA0-469E-B229-95A91A5087E0}" + ProjectSection(ProjectDependencies) = postProject + {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} + EndProjectSection +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test", "Test\Test.vcxproj", "{29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}" + ProjectSection(ProjectDependencies) = postProject + {F9A72303-6381-4C80-86FF-606A2F6F7B96} = {F9A72303-6381-4C80-86FF-606A2F6F7B96} + {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} + EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Search", "Search\Search.vcxproj", "{97615D3B-9FA0-469E-B229-95A91A5087E0}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JavaCore", "Wrappers\JavaCore.vcxproj", "{93FEB26B-965E-4157-8BE5-052F5CA112BB}" + ProjectSection(ProjectDependencies) = postProject + {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "JavaClient", "Wrappers\JavaClient.vcxproj", "{8866BF98-AA2E-450F-9F33-083E007CCA74}" + ProjectSection(ProjectDependencies) = postProject + {F9A72303-6381-4C80-86FF-606A2F6F7B96} = {F9A72303-6381-4C80-86FF-606A2F6F7B96} + {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} = {C2BC5FDE-C853-4F3D-B7E4-2C9B5524DDF9} + EndProjectSection EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -120,14 +138,6 @@ Global {F492F794-E78B-4B1F-A556-5E045B9163D5}.Release|x64.Build.0 = Release|x64 {F492F794-E78B-4B1F-A556-5E045B9163D5}.Release|x86.ActiveCfg = Release|Win32 {F492F794-E78B-4B1F-A556-5E045B9163D5}.Release|x86.Build.0 = Release|Win32 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x64.ActiveCfg = Debug|x64 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x64.Build.0 = Debug|x64 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x86.ActiveCfg = Debug|Win32 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x86.Build.0 = Debug|Win32 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x64.ActiveCfg = Release|x64 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x64.Build.0 = Release|x64 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x86.ActiveCfg = Release|Win32 - {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x86.Build.0 = Release|Win32 {97615D3B-9FA0-469E-B229-95A91A5087E0}.Debug|x64.ActiveCfg = Debug|x64 {97615D3B-9FA0-469E-B229-95A91A5087E0}.Debug|x64.Build.0 = Debug|x64 {97615D3B-9FA0-469E-B229-95A91A5087E0}.Debug|x86.ActiveCfg = Debug|Win32 @@ -136,6 +146,30 @@ Global {97615D3B-9FA0-469E-B229-95A91A5087E0}.Release|x64.Build.0 = Release|x64 {97615D3B-9FA0-469E-B229-95A91A5087E0}.Release|x86.ActiveCfg = Release|Win32 {97615D3B-9FA0-469E-B229-95A91A5087E0}.Release|x86.Build.0 = Release|Win32 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x64.ActiveCfg = Debug|x64 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x64.Build.0 = Debug|x64 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x86.ActiveCfg = Debug|Win32 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Debug|x86.Build.0 = Debug|Win32 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x64.ActiveCfg = Release|x64 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x64.Build.0 = Release|x64 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x86.ActiveCfg = Release|Win32 + {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x86.Build.0 = Release|Win32 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x64.ActiveCfg = Debug|x64 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x64.Build.0 = Debug|x64 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x86.ActiveCfg = Debug|Win32 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x86.Build.0 = Debug|Win32 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x64.ActiveCfg = Release|x64 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x64.Build.0 = Release|x64 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x86.ActiveCfg = Release|Win32 + {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x86.Build.0 = Release|Win32 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x64.ActiveCfg = Debug|x64 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x64.Build.0 = Debug|x64 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.ActiveCfg = Debug|Win32 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.Build.0 = Debug|Win32 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.ActiveCfg = Release|x64 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.Build.0 = Release|x64 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.ActiveCfg = Release|Win32 + {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Search/CMakeLists.txt b/Search/CMakeLists.txt deleted file mode 100644 index 4fe79d68..00000000 --- a/Search/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -file(GLOB HDR_FILES ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/Common/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/BKT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Core/KDT/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Helper/*.h) -file(GLOB SRC_FILES ${PROJECT_SOURCE_DIR}/AnnService/src/Core/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/Common/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/BKT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Core/KDT/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Helper/*.cpp) - -include_directories(${PROJECT_SOURCE_DIR}/AnnService) - -file(GLOB SEARCH_FILES ${SRC_FILES} ${PROJECT_SOURCE_DIR}/Search/*.cpp) -add_executable (search ${SEARCH_FILES} ${HDR_FILES}) -target_link_libraries(search ${Boost_LIBRARIES} ${TBB_LIBRARIES}) - -install(TARGETS search - RUNTIME DESTINATION bin - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib) - diff --git a/Search/packages.config b/Search/packages.config deleted file mode 100644 index 424245f6..00000000 --- a/Search/packages.config +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/Wrappers/CMakeLists.txt b/Wrappers/CMakeLists.txt new file mode 100644 index 00000000..e00ef462 --- /dev/null +++ b/Wrappers/CMakeLists.txt @@ -0,0 +1,105 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +find_package(Python2 COMPONENTS Development) +if (Python2_FOUND) + include_directories (${Python2_INCLUDE_DIRS}) + link_directories (${Python2_LIBRARY_DIRS}) + set (Python_INCLUDE_DIRS ${Python2_INCLUDE_DIRS}) + set (Python_LIBRARIES ${Python2_LIBRARIES}) + set (Python_FOUND true) +else() + find_package(Python3 COMPONENTS Development) + if (Python3_FOUND) + include_directories (${Python3_INCLUDE_DIRS}) + link_directories (${Python3_LIBRARY_DIRS}) + set (Python_INCLUDE_DIRS ${Python3_INCLUDE_DIRS}) + set (Python_LIBRARIES ${Python3_LIBRARIES}) + set (Python_FOUND true) + endif() +endif() + +if (Python_FOUND) + message (STATUS "Found Python.") + message (STATUS "Include Path: ${Python_INCLUDE_DIRS}") + message (STATUS "Library Path: ${Python_LIBRARIES}") + + if (WIN32) + set(PY_SUFFIX .pyd) + else() + set(PY_SUFFIX .so) + endif() + + execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonCore.i) + execute_process(COMMAND swig -python -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/PythonClient.i) + + include_directories(${PYTHON_INCLUDE_PATH} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers) + + file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h) + file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_pwrap.cpp) + add_library (_SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) + set_target_properties(_SPTAG PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX}) + target_link_libraries(_SPTAG SPTAGLib ${Python_LIBRARIES} ${TBB_LIBRARIES}) + add_custom_command(TARGET _SPTAG POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAG.py ${EXECUTABLE_OUTPUT_PATH}) + + file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) + file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_pwrap.cpp) + add_library (_SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) + set_target_properties(_SPTAGClient PROPERTIES PREFIX "" SUFFIX ${PY_SUFFIX}) + target_link_libraries(_SPTAGClient SPTAGLib ${Python_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES}) + add_custom_command(TARGET _SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py ${EXECUTABLE_OUTPUT_PATH}) + + install(TARGETS _SPTAG _SPTAGClient + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/SPTAG.py ${PROJECT_SOURCE_DIR}/Wrappers/inc/SPTAGClient.py DESTINATION bin) +else() + message (STATUS "Could not find Python.") +endif() + +find_package(JNI) +if (JNI_FOUND) + include_directories (${JNI_INCLUDE_DIRS}) + link_directories (${JNI_LIBRARY_DIRS}) + message (STATUS "Found JNI.") + message (STATUS "Include Path: ${JNI_INCLUDE_DIRS}") + message (STATUS "Library Path: ${JNI_LIBRARIES}") + + if (WIN32) + set (JAVA_SUFFIX .dll) + else() + set (JAVA_SUFFIX .so) + endif() + + execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaCore.i) + execute_process(COMMAND swig -java -c++ -I${PROJECT_SOURCE_DIR}/Wrappers/inc -o ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/JavaClient.i) + + include_directories(${JNI_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/AnnService ${PROJECT_SOURCE_DIR}/Wrappers) + + file(GLOB CORE_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface.h) + file(GLOB CORE_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/CoreInterface.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/CoreInterface_jwrap.cpp) + add_library (SPTAG SHARED ${CORE_SRC_FILES} ${CORE_HDR_FILES}) + set_target_properties(SPTAG PROPERTIES SUFFIX ${JAVA_SUFFIX}) + target_link_libraries(SPTAG SPTAGLib ${JNI_LIBRARIES} ${TBB_LIBRARIES}) + + file(GLOB CLIENT_HDR_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Socket/*.h ${PROJECT_SOURCE_DIR}/AnnService/inc/Client/*.h) + file(GLOB CLIENT_SRC_FILES ${PROJECT_SOURCE_DIR}/Wrappers/src/ClientInterface.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Socket/*.cpp ${PROJECT_SOURCE_DIR}/AnnService/src/Client/*.cpp ${PROJECT_SOURCE_DIR}/Wrappers/inc/ClientInterface_jwrap.cpp) + add_library (SPTAGClient SHARED ${CLIENT_SRC_FILES} ${CLIENT_HDR_FILES}) + set_target_properties(SPTAGClient PROPERTIES SUFFIX ${JAVA_SUFFIX}) + target_link_libraries(SPTAGClient SPTAGLib ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${TBB_LIBRARIES}) + + file(GLOB JAVA_FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java) + foreach(JAVA_FILE ${JAVA_FILES}) + message (STATUS "Add copy post-command for file " ${JAVA_FILE}) + add_custom_command(TARGET SPTAGClient POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${JAVA_FILE} ${EXECUTABLE_OUTPUT_PATH}) + endforeach(JAVA_FILE) + + install(TARGETS SPTAG SPTAGClient + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib) + install(FILES ${PROJECT_SOURCE_DIR}/Wrappers/inc/*.java DESTINATION bin) +else() + message (STATUS "Could not find JNI.") +endif() \ No newline at end of file diff --git a/Wrappers/JavaClient.vcxproj b/Wrappers/JavaClient.vcxproj new file mode 100644 index 00000000..bc917561 --- /dev/null +++ b/Wrappers/JavaClient.vcxproj @@ -0,0 +1,190 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {8866BF98-AA2E-450F-9F33-083E007CCA74} + JavaClient + 8.1 + + + + + DynamicLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + DynamicLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + SPTAGClient + .dll + $(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\ + $(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath) + $(OutLibDir);$(LibraryPath) + $(OutAppDir) + + + false + + + + $(JavaLib);CoreLibrary.lib;SocketLib.lib;%(AdditionalDependencies) + + + + + $(JavaIncDir);%(AdditionalIncludeDirectories) + + + + + Level3 + MaxSpeed + true + true + true + true + _WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + + + true + true + + + + + Level3 + Disabled + true + true + + + + + Level3 + Disabled + true + true + _WINDLL;_SCL_SECURE_NO_WARNINGS;SWIG_JAVA_INTERPRETER_NO_DEBUG;%(PreprocessorDefinitions) + Guard + ProgramDatabase + + + /guard:cf %(AdditionalOptions) + + + + + Level3 + MaxSpeed + true + true + true + true + + + true + true + + + + + + + + + + + + + false + false + false + false + + + + + + + + + + + + + + + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + + \ No newline at end of file diff --git a/Wrappers/JavaClient.vcxproj.filters b/Wrappers/JavaClient.vcxproj.filters new file mode 100644 index 00000000..e5e9baf5 --- /dev/null +++ b/Wrappers/JavaClient.vcxproj.filters @@ -0,0 +1,38 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + + + Resource Files + + + \ No newline at end of file diff --git a/Wrappers/JavaCore.vcxproj b/Wrappers/JavaCore.vcxproj new file mode 100644 index 00000000..bf11ef9e --- /dev/null +++ b/Wrappers/JavaCore.vcxproj @@ -0,0 +1,137 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {93FEB26B-965E-4157-8BE5-052F5CA112BB} + JavaCore + 8.1 + + + + + Application + true + v140 + MultiByte + + + Application + false + v140 + true + MultiByte + + + DynamicLibrary + true + v140 + MultiByte + + + DynamicLibrary + false + v140 + true + MultiByte + + + + + + + + + + + + + + + + + + + + SPTAG + .dll + $(SolutionDir)obj\$(Platform)_$(Configuration)\$(ProjectName)\ + $(ProjectDir);$(SolutionDir)AnnService\;$(IncludePath) + $(OutLibDir);$(LibraryPath) + $(OutAppDir) + + + false + + + + $(JavaLib);CoreLibrary.lib;%(AdditionalDependencies) + + + + + _WINDLL;SWIG_JAVA_INTERPRETER_NO_DEBUG;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + $(JavaIncDir);%(AdditionalIncludeDirectories) + Guard + ProgramDatabase + _WINDLL;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + + + /guard:cf %(AdditionalOptions) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + \ No newline at end of file diff --git a/Wrappers/JavaCore.vcxproj.filters b/Wrappers/JavaCore.vcxproj.filters new file mode 100644 index 00000000..ae22a6a6 --- /dev/null +++ b/Wrappers/JavaCore.vcxproj.filters @@ -0,0 +1,38 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {ba4289c4-f872-4dbc-a57f-7b415614afb3} + + + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + + + + Resources + + + \ No newline at end of file diff --git a/PythonWrapper/PythonClient.vcxproj b/Wrappers/PythonClient.vcxproj similarity index 97% rename from PythonWrapper/PythonClient.vcxproj rename to Wrappers/PythonClient.vcxproj index fd745df4..a9cad340 100644 --- a/PythonWrapper/PythonClient.vcxproj +++ b/Wrappers/PythonClient.vcxproj @@ -147,10 +147,10 @@ - + - + false false false @@ -169,7 +169,7 @@ - + diff --git a/PythonWrapper/PythonClient.vcxproj.filters b/Wrappers/PythonClient.vcxproj.filters similarity index 92% rename from PythonWrapper/PythonClient.vcxproj.filters rename to Wrappers/PythonClient.vcxproj.filters index 8da7430e..928aef15 100644 --- a/PythonWrapper/PythonClient.vcxproj.filters +++ b/Wrappers/PythonClient.vcxproj.filters @@ -26,12 +26,12 @@ Source Files - + Source Files - + Resource Files diff --git a/PythonWrapper/PythonCore.vcxproj b/Wrappers/PythonCore.vcxproj similarity index 92% rename from PythonWrapper/PythonCore.vcxproj rename to Wrappers/PythonCore.vcxproj index f614d325..2ec11c0a 100644 --- a/PythonWrapper/PythonCore.vcxproj +++ b/Wrappers/PythonCore.vcxproj @@ -103,10 +103,10 @@ - + - + @@ -114,11 +114,11 @@ - + - + @@ -130,6 +130,6 @@ - + \ No newline at end of file diff --git a/PythonWrapper/PythonCore.vcxproj.filters b/Wrappers/PythonCore.vcxproj.filters similarity index 89% rename from PythonWrapper/PythonCore.vcxproj.filters rename to Wrappers/PythonCore.vcxproj.filters index dd99d98f..46fb928d 100644 --- a/PythonWrapper/PythonCore.vcxproj.filters +++ b/Wrappers/PythonCore.vcxproj.filters @@ -25,12 +25,13 @@ Source Files - + Source Files - + + Resources diff --git a/Wrappers/PythonCore.vcxproj.user b/Wrappers/PythonCore.vcxproj.user new file mode 100644 index 00000000..abe8dd89 --- /dev/null +++ b/Wrappers/PythonCore.vcxproj.user @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/PythonWrapper/inc/ClientInterface.h b/Wrappers/inc/ClientInterface.h similarity index 72% rename from PythonWrapper/inc/ClientInterface.h rename to Wrappers/inc/ClientInterface.h index 210c9443..94d46bca 100644 --- a/PythonWrapper/inc/ClientInterface.h +++ b/Wrappers/inc/ClientInterface.h @@ -4,38 +4,14 @@ #ifndef _SPTAG_PW_CLIENTINTERFACE_H_ #define _SPTAG_PW_CLIENTINTERFACE_H_ -#ifndef SWIG - #include "TransferDataType.h" -#include "inc/Core/CommonDataStructure.h" #include "inc/Socket/Client.h" -#include "inc/Socket/RemoteSearchQuery.h" #include "inc/Socket/ResourceManager.h" #include #include #include -#else -%module SPTAGClient - -%{ -#include "inc/ClientInterface.h" -%} - -%include -%shared_ptr(AnnClient) - -%include "PyByteArray.i" - -%{ -#define SWIG_FILE_WITH_INIT -%} - -#endif // SWIG - -typedef unsigned int SizeType; - class AnnClient { public: @@ -43,19 +19,19 @@ class AnnClient ~AnnClient(); - void SetTimeoutMilliseconds(SizeType p_timeout); + void SetTimeoutMilliseconds(int p_timeout); void SetSearchParam(const char* p_name, const char* p_value); void ClearSearchParam(); - RemoteSearchResult Search(ByteArray p_data, SizeType p_resultNum, const char* p_valueType, bool p_withMetaData); + std::shared_ptr Search(ByteArray p_data, int p_resultNum, const char* p_valueType, bool p_withMetaData); bool IsConnected() const; private: std::string CreateSearchQuery(const ByteArray& p_data, - SizeType p_resultNum, + int p_resultNum, bool p_extractMetadata, SPTAG::VectorValueType p_valueType); diff --git a/PythonWrapper/inc/CoreInterface.h b/Wrappers/inc/CoreInterface.h similarity index 51% rename from PythonWrapper/inc/CoreInterface.h rename to Wrappers/inc/CoreInterface.h index a62fad4c..8da98e3d 100644 --- a/PythonWrapper/inc/CoreInterface.h +++ b/Wrappers/inc/CoreInterface.h @@ -4,39 +4,16 @@ #ifndef _SPTAG_PW_COREINTERFACE_H_ #define _SPTAG_PW_COREINTERFACE_H_ -#ifndef SWIG - #include "TransferDataType.h" #include "inc/Core/Common.h" #include "inc/Core/VectorIndex.h" -#else -%module SPTAG - -%{ -#include "inc/CoreInterface.h" -%} - -%include -%shared_ptr(AnnIndex) -%shared_ptr(QueryResult) -%include "PyByteArray.i" - -%{ -#define SWIG_FILE_WITH_INIT -%} - -#endif // SWIG - - -typedef unsigned int SizeType; - class AnnIndex { public: - AnnIndex(SizeType p_dimension); + AnnIndex(int p_dimension); - AnnIndex(const char* p_algoType, const char* p_valueType, SizeType p_dimension); + AnnIndex(const char* p_algoType, const char* p_valueType, int p_dimension); ~AnnIndex(); @@ -44,23 +21,23 @@ class AnnIndex void SetSearchParam(const char* p_name, const char* p_value); - bool Build(ByteArray p_data, SizeType p_num); + bool Build(ByteArray p_data, int p_num); - bool BuildWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num); + bool BuildWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num); - std::shared_ptr Search(ByteArray p_data, SizeType p_resultNum); + std::shared_ptr Search(ByteArray p_data, int p_resultNum); - std::shared_ptr SearchWithMetaData(ByteArray p_data, SizeType p_resultNum); + std::shared_ptr SearchWithMetaData(ByteArray p_data, int p_resultNum); bool ReadyToServe() const; bool Save(const char* p_saveFile) const; - bool Add(ByteArray p_data, SizeType p_num); + bool Add(ByteArray p_data, int p_num); - bool AddWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num); + bool AddWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num); - bool Delete(ByteArray p_data, SizeType p_num); + bool Delete(ByteArray p_data, int p_num); static AnnIndex Load(const char* p_loaderFile); @@ -69,9 +46,9 @@ class AnnIndex std::shared_ptr m_index; - SizeType m_inputVectorSize; + int m_inputVectorSize; - SizeType m_dimension; + int m_dimension; SPTAG::IndexAlgoType m_algoType; diff --git a/Wrappers/inc/JavaClient.i b/Wrappers/inc/JavaClient.i new file mode 100644 index 00000000..ba8d93fc --- /dev/null +++ b/Wrappers/inc/JavaClient.i @@ -0,0 +1,16 @@ +%module SPTAGClient + +%{ +#include "inc/ClientInterface.h" +%} + +%include +%shared_ptr(AnnClient) +%shared_ptr(RemoteSearchResult) +%include "JavaCommon.i" + +%{ +#define SWIG_FILE_WITH_INIT +%} + +%include "ClientInterface.h" diff --git a/Wrappers/inc/JavaCommon.i b/Wrappers/inc/JavaCommon.i new file mode 100644 index 00000000..6d4c1259 --- /dev/null +++ b/Wrappers/inc/JavaCommon.i @@ -0,0 +1,17 @@ +#ifdef SWIGJAVA + +%typemap(jni) ByteArray "jbyteArray" +%typemap(jtype) ByteArray "byte[]" +%typemap(jstype) ByteArray "byte[]" +%typemap(in) ByteArray { + $1.SetData((std::uint8_t*)JCALL2(GetByteArrayElements, jenv, $input, 0)); + $1.SetLength(JCALL1(GetArrayLength, jenv, $input)); +} +%typemap(out) ByteArray { + $result = JCALL1(NewByteArray, jenv, $1.Length()); + JCALL4(SetByteArrayRegion, jenv, $result, 0, $1.Length(), (jbyte *)$1.Data()); +} +%typemap(javain) ByteArray "$javainput" +%typemap(javaout) ByteArray { return $jnicall; } + +#endif diff --git a/Wrappers/inc/JavaCore.i b/Wrappers/inc/JavaCore.i new file mode 100644 index 00000000..e52308af --- /dev/null +++ b/Wrappers/inc/JavaCore.i @@ -0,0 +1,16 @@ +%module SPTAG + +%{ +#include "inc/CoreInterface.h" +%} + +%include +%shared_ptr(AnnIndex) +%shared_ptr(QueryResult) +%include "JavaCommon.i" + +%{ +#define SWIG_FILE_WITH_INIT +%} + +%include "CoreInterface.h" \ No newline at end of file diff --git a/Wrappers/inc/PythonClient.i b/Wrappers/inc/PythonClient.i new file mode 100644 index 00000000..a70e2fde --- /dev/null +++ b/Wrappers/inc/PythonClient.i @@ -0,0 +1,16 @@ +%module SPTAGClient + +%{ +#include "inc/ClientInterface.h" +%} + +%include +%shared_ptr(AnnClient) +%shared_ptr(RemoteSearchResult) +%include "PythonCommon.i" + +%{ +#define SWIG_FILE_WITH_INIT +%} + +%include "ClientInterface.h" \ No newline at end of file diff --git a/PythonWrapper/inc/PyByteArray.i b/Wrappers/inc/PythonCommon.i similarity index 95% rename from PythonWrapper/inc/PyByteArray.i rename to Wrappers/inc/PythonCommon.i index 7c3b19b8..7fb9380e 100644 --- a/PythonWrapper/inc/PyByteArray.i +++ b/Wrappers/inc/PythonCommon.i @@ -32,14 +32,14 @@ } %} -%typemap(out) RemoteSearchResult +%typemap(out) std::shared_ptr %{ { $result = PyTuple_New(3); auto dstVecIDs = PyList_New(0); auto dstVecDists = PyList_New(0); auto dstMetadata = PyList_New(0); - for (const auto& indexRes : $1.m_allIndexResults) + for (const auto& indexRes : $1->m_allIndexResults) { for (const auto& res : indexRes.m_results) { diff --git a/Wrappers/inc/PythonCore.i b/Wrappers/inc/PythonCore.i new file mode 100644 index 00000000..d2f38ca8 --- /dev/null +++ b/Wrappers/inc/PythonCore.i @@ -0,0 +1,16 @@ +%module SPTAG + +%{ +#include "inc/CoreInterface.h" +%} + +%include +%shared_ptr(AnnIndex) +%shared_ptr(QueryResult) +%include "PythonCommon.i" + +%{ +#define SWIG_FILE_WITH_INIT +%} + +%include "CoreInterface.h" \ No newline at end of file diff --git a/PythonWrapper/inc/TransferDataType.h b/Wrappers/inc/TransferDataType.h similarity index 92% rename from PythonWrapper/inc/TransferDataType.h rename to Wrappers/inc/TransferDataType.h index 51ef9614..6c2a9b37 100644 --- a/PythonWrapper/inc/TransferDataType.h +++ b/Wrappers/inc/TransferDataType.h @@ -12,8 +12,6 @@ typedef SPTAG::ByteArray ByteArray; typedef SPTAG::QueryResult QueryResult; -typedef SPTAG::BasicResult BasicResult; - typedef SPTAG::Socket::RemoteSearchResult RemoteSearchResult; #endif // _SPTAG_PW_TRANSFERDATATYPE_H_ diff --git a/PythonWrapper/packages.config b/Wrappers/packages.config similarity index 100% rename from PythonWrapper/packages.config rename to Wrappers/packages.config diff --git a/PythonWrapper/src/ClientInterface.cpp b/Wrappers/src/ClientInterface.cpp similarity index 95% rename from PythonWrapper/src/ClientInterface.cpp rename to Wrappers/src/ClientInterface.cpp index eeff8f45..65a1d4cf 100644 --- a/PythonWrapper/src/ClientInterface.cpp +++ b/Wrappers/src/ClientInterface.cpp @@ -64,7 +64,7 @@ AnnClient::~AnnClient() void -AnnClient::SetTimeoutMilliseconds(SizeType p_timeout) +AnnClient::SetTimeoutMilliseconds(int p_timeout) { m_timeoutInMilliseconds = p_timeout; } @@ -101,8 +101,8 @@ AnnClient::ClearSearchParam() } -RemoteSearchResult -AnnClient::Search(ByteArray p_data, SizeType p_resultNum, const char* p_valueType, bool p_withMetaData) +std::shared_ptr +AnnClient::Search(ByteArray p_data, int p_resultNum, const char* p_valueType, bool p_withMetaData) { using namespace SPTAG; @@ -166,7 +166,7 @@ AnnClient::Search(ByteArray p_data, SizeType p_resultNum, const char* p_valueTyp signal->Wait(); } - return std::move(ret); + return std::make_shared(ret); } @@ -223,7 +223,7 @@ AnnClient::SearchResponseHanlder(SPTAG::Socket::ConnectionID p_localConnectionID std::string AnnClient::CreateSearchQuery(const ByteArray& p_data, - SizeType p_resultNum, + int p_resultNum, bool p_extractMetadata, SPTAG::VectorValueType p_valueType) { diff --git a/PythonWrapper/src/CoreInterface.cpp b/Wrappers/src/CoreInterface.cpp similarity index 89% rename from PythonWrapper/src/CoreInterface.cpp rename to Wrappers/src/CoreInterface.cpp index 1587e9ef..99d545bc 100644 --- a/PythonWrapper/src/CoreInterface.cpp +++ b/Wrappers/src/CoreInterface.cpp @@ -5,7 +5,7 @@ #include "inc/Helper/StringConvert.h" -AnnIndex::AnnIndex(SizeType p_dimension) +AnnIndex::AnnIndex(int p_dimension) : m_algoType(SPTAG::IndexAlgoType::BKT), m_inputValueType(SPTAG::VectorValueType::Float), m_dimension(p_dimension) @@ -14,7 +14,7 @@ AnnIndex::AnnIndex(SizeType p_dimension) } -AnnIndex::AnnIndex(const char* p_algoType, const char* p_valueType, SizeType p_dimension) +AnnIndex::AnnIndex(const char* p_algoType, const char* p_valueType, int p_dimension) : m_algoType(SPTAG::IndexAlgoType::Undefined), m_inputValueType(SPTAG::VectorValueType::Undefined), m_dimension(p_dimension) @@ -41,7 +41,7 @@ AnnIndex::~AnnIndex() bool -AnnIndex::Build(ByteArray p_data, SizeType p_num) +AnnIndex::Build(ByteArray p_data, int p_num) { if (nullptr == m_index) { @@ -56,7 +56,7 @@ AnnIndex::Build(ByteArray p_data, SizeType p_num) bool -AnnIndex::BuildWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num) +AnnIndex::BuildWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num) { if (nullptr == m_index) { @@ -69,8 +69,8 @@ AnnIndex::BuildWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num) std::shared_ptr vectors(new SPTAG::BasicVectorSet(p_data, m_inputValueType, - static_cast(m_dimension), - static_cast(p_num))); + static_cast(m_dimension), + static_cast(p_num))); std::uint64_t* offsets = new std::uint64_t[p_num + 1]{ 0 }; int current = 1; @@ -108,7 +108,7 @@ AnnIndex::SetSearchParam(const char* p_name, const char* p_value) std::shared_ptr -AnnIndex::Search(ByteArray p_data, SizeType p_resultNum) +AnnIndex::Search(ByteArray p_data, int p_resultNum) { std::shared_ptr results = std::make_shared(p_data.Data(), p_resultNum, false); @@ -120,7 +120,7 @@ AnnIndex::Search(ByteArray p_data, SizeType p_resultNum) } std::shared_ptr -AnnIndex::SearchWithMetaData(ByteArray p_data, SizeType p_resultNum) +AnnIndex::SearchWithMetaData(ByteArray p_data, int p_resultNum) { std::shared_ptr results = std::make_shared(p_data.Data(), p_resultNum, true); @@ -160,7 +160,7 @@ AnnIndex::Load(const char* p_loaderFile) bool -AnnIndex::Add(ByteArray p_data, SizeType p_num) +AnnIndex::Add(ByteArray p_data, int p_num) { if (nullptr == m_index) { @@ -175,7 +175,7 @@ AnnIndex::Add(ByteArray p_data, SizeType p_num) bool -AnnIndex::AddWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num) +AnnIndex::AddWithMetaData(ByteArray p_data, ByteArray p_meta, int p_num) { if (nullptr == m_index) { @@ -188,8 +188,8 @@ AnnIndex::AddWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num) std::shared_ptr vectors(new SPTAG::BasicVectorSet(p_data, m_inputValueType, - static_cast(m_dimension), - static_cast(p_num))); + static_cast(m_dimension), + static_cast(p_num))); std::uint64_t* offsets = new std::uint64_t[p_num + 1]{ 0 }; int current = 1; @@ -203,7 +203,7 @@ AnnIndex::AddWithMetaData(ByteArray p_data, ByteArray p_meta, SizeType p_num) bool -AnnIndex::Delete(ByteArray p_data, SizeType p_num) +AnnIndex::Delete(ByteArray p_data, int p_num) { if (nullptr != m_index && p_num > 0) { From 12ce53b2f495e33958ef24e9f5bfa29b348a2655 Mon Sep 17 00:00:00 2001 From: cheqi Date: Thu, 21 Feb 2019 15:25:43 +0800 Subject: [PATCH 09/14] Update docs --- README.md | 1 + docs/GettingStart.md | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c066dd29..9e3591f3 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ The searches in the trees and the graph are iteratively conducted. * swig >= 3.0 * cmake >= 3.12.0 * boost >= 1.67.0 +* tbb >= 4.2 ### **Install** diff --git a/docs/GettingStart.md b/docs/GettingStart.md index 2f1e868f..540905eb 100644 --- a/docs/GettingStart.md +++ b/docs/GettingStart.md @@ -9,7 +9,7 @@ -v, --vectortype Input vector data type (e.g. Float, Int8, Int16), required. -i, --input Input raw data, required. -o, --outputfolder Output folder, required. - -a, --algo Index Algorithm type, required. + -a, --algo Index Algorithm type (e.g. BKT, KDT), required. -t, --thread Thread Number, default is 32. --delimiter Vector delimiter, default is |. @@ -19,7 +19,7 @@ ### **Index Search** ```bash Usage: - ./Search [options] + ./IndexSearcher [options] Options Index.QueryFile=XXX Input Query file Index.ResultFile=XXX Output result file @@ -28,6 +28,23 @@ Index.MaxCheck=XXX The maxcheck of the search ``` +### ** Input File format ** +> Input raw data for index build and input query file for index search (suppose vector dimension is 3): +``` +\t||| +\t||| +... +``` +where each line represents a vector with its metadata and its value separated by a tab space. Each dimension of a vector is separated by | or use --delimiter to define the separator. + +> Truth file to calculate recall (suppose K is 2): +``` + + +... +``` +where each line represents the K nearest neighbors of a query separated by a blank space. Each neighbor is given by its vector id. + ### **Server** ```bash Usage: @@ -190,7 +207,7 @@ if __name__ == '__main__': ``` - > Python Client Wrapper, Surpose there is a sever run at 127.0.0.1:8000 serving nytimes datasets: + > Python Client Wrapper, Suppose there is a sever run at 127.0.0.1:8000 serving ten-dimensional vector datasets: ```python import SPTAGClient import numpy as np From 48759471d8da435bc977f49758d9cb7cf4d18f0a Mon Sep 17 00:00:00 2001 From: cheqi Date: Tue, 26 Feb 2019 12:51:25 +0800 Subject: [PATCH 10/14] fix search result --- Wrappers/inc/JavaCommon.i | 43 +++++++++++++++++++++++++++++++++ Wrappers/inc/JavaCore.i | 3 ++- Wrappers/inc/TransferDataType.h | 9 +++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/Wrappers/inc/JavaCommon.i b/Wrappers/inc/JavaCommon.i index 6d4c1259..654bcdcf 100644 --- a/Wrappers/inc/JavaCommon.i +++ b/Wrappers/inc/JavaCommon.i @@ -14,4 +14,47 @@ %typemap(javain) ByteArray "$javainput" %typemap(javaout) ByteArray { return $jnicall; } +%typemap(jni) std::shared_ptr "jobjectArray" +%typemap(jtype) std::shared_ptr "Result[]" +%typemap(jstype) std::shared_ptr "Result[]" +%typemap(out) std::shared_ptr { + jclass retClass = jenv->FindClass("Result"); + int len = $1->GetResultNum(); + $result = jenv->NewObjectArray(len, retClass, NULL); + for (int i = 0; i < len; i++) { + auto& meta = $1->GetMetadata(i); + jbyteArray bptr = jenv->NewByteArray(meta.Length()); + jenv->SetByteArrayRegion(bptr, 0, meta.Length(), (jbyte *)meta.Data()); + jenv->SetObjectArrayElement(jresult, i, jenv->NewObject(retClass, jenv->GetMethodID(retClass, "", "(IF[B)V"), (jint)($1->GetResult(i)->VID), (jfloat)($1->GetResult(i)->Dist), bptr)); + } +} +%typemap(javaout) std::shared_ptr { return $jnicall; } + +%typemap(jni) std::shared_ptr "jobjectArray" +%typemap(jtype) std::shared_ptr "Result[]" +%typemap(jstype) std::shared_ptr "Result[]" +%typemap(out) std::shared_ptr { + int combinelen = 0; + int nodelen = (int)(($1->m_allIndexResults).size()); + for (int i = 0; i < nodelen; i++) { + combinelen += $1->m_allIndexResults[i].m_results.GetResultNum(); + } + jclass retClass = jenv->FindClass("Result"); + $result = jenv->NewObjectArray(combinelen, retClass, NULL); + int id = 0; + for (int i = 0; i < nodelen; i++) { + for (int j = 0; j < $1->m_allIndexResults[i].m_results.GetResultNum(); j++) { + auto& ptr = $1->m_allIndexResults[i].m_results; + auto& meta = ptr.GetMetadata(j); + jbyteArray bptr = jenv->NewByteArray(meta.Length()); + jenv->SetByteArrayRegion(bptr, 0, meta.Length(), (jbyte *)meta.Data()); + jenv->SetObjectArrayElement(jresult, id, jenv->NewObject(retClass, jenv->GetMethodID(retClass, "", "(IF[B)V"), (jint)(ptr.GetResult(j)->VID), (jfloat)(ptr.GetResult(j)->Dist), bptr)); + id++; + } + } +} +%typemap(javaout) std::shared_ptr { + return $jnicall; +} + #endif diff --git a/Wrappers/inc/JavaCore.i b/Wrappers/inc/JavaCore.i index e52308af..125ce7e5 100644 --- a/Wrappers/inc/JavaCore.i +++ b/Wrappers/inc/JavaCore.i @@ -13,4 +13,5 @@ #define SWIG_FILE_WITH_INIT %} -%include "CoreInterface.h" \ No newline at end of file +%include "CoreInterface.h" +%include "TransferDataType.h" diff --git a/Wrappers/inc/TransferDataType.h b/Wrappers/inc/TransferDataType.h index 6c2a9b37..e9eb64de 100644 --- a/Wrappers/inc/TransferDataType.h +++ b/Wrappers/inc/TransferDataType.h @@ -14,4 +14,13 @@ typedef SPTAG::QueryResult QueryResult; typedef SPTAG::Socket::RemoteSearchResult RemoteSearchResult; +class Result { +public: + int VID; + float Dist; + ByteArray Meta; + + Result(int _VID, float _Dist, ByteArray _Meta): VID(_VID), Dist(_Dist), Meta(_Meta) {} +}; + #endif // _SPTAG_PW_TRANSFERDATATYPE_H_ From 75dd2764353169777ec59304ba6833fc7810fc48 Mon Sep 17 00:00:00 2001 From: cheqi Date: Thu, 7 Mar 2019 15:47:36 +0800 Subject: [PATCH 11/14] change IndexBuilder to support binary input data --- AnnService/src/IndexBuilder/main.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/AnnService/src/IndexBuilder/main.cpp b/AnnService/src/IndexBuilder/main.cpp index 1f83b818..ba4de461 100644 --- a/AnnService/src/IndexBuilder/main.cpp +++ b/AnnService/src/IndexBuilder/main.cpp @@ -57,8 +57,8 @@ int main(int argc, char* argv[]) ErrorCode code; if (options->m_inputFiles.find("BIN:") == 0) { - options->m_inputFiles = options->m_inputFiles.substr(4); - std::ifstream inputStream(options->m_inputFiles, std::ifstream::binary); + std::vector files = SPTAG::Helper::StrUtils::SplitString(options->m_inputFiles.substr(4), ","); + std::ifstream inputStream(files[0], std::ifstream::binary); if (!inputStream.is_open()) { fprintf(stderr, "Failed to read input file.\n"); exit(1); @@ -71,9 +71,13 @@ int main(int argc, char* argv[]) char* vecBuf = reinterpret_cast(vectorSet.Data()); inputStream.read(vecBuf, totalRecordVectorBytes); inputStream.close(); - std::shared_ptr p_vectorSet(new BasicVectorSet(vectorSet, options->m_inputValueType, col, row)); - code = indexBuilder->BuildIndex(p_vectorSet, nullptr); + + std::shared_ptr p_metaSet = nullptr; + if (files.size() >= 3) { + p_metaSet.reset(new FileMetadataSet(files[1], files[2])); + } + code = indexBuilder->BuildIndex(p_vectorSet, p_metaSet); indexBuilder->SaveIndex(options->m_outputFolder); } else { From ac7d09a03e9a2749e8e4586a1f9a3b41f732fa72 Mon Sep 17 00:00:00 2001 From: cheqi Date: Thu, 7 Mar 2019 15:57:57 +0800 Subject: [PATCH 12/14] temp remove java related projects --- SPTAG.sln | 2 -- 1 file changed, 2 deletions(-) diff --git a/SPTAG.sln b/SPTAG.sln index 09c1564f..e65ed07e 100644 --- a/SPTAG.sln +++ b/SPTAG.sln @@ -159,7 +159,6 @@ Global {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x86.ActiveCfg = Debug|Win32 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x86.Build.0 = Debug|Win32 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x64.ActiveCfg = Release|x64 - {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x64.Build.0 = Release|x64 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x86.ActiveCfg = Release|Win32 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x86.Build.0 = Release|Win32 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x64.ActiveCfg = Debug|x64 @@ -167,7 +166,6 @@ Global {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.ActiveCfg = Debug|Win32 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.Build.0 = Debug|Win32 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.ActiveCfg = Release|x64 - {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.Build.0 = Release|x64 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.ActiveCfg = Release|Win32 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.Build.0 = Release|Win32 EndGlobalSection From 8aca8d0ac2a752c2905d9dace1a5df3062a7c758 Mon Sep 17 00:00:00 2001 From: cheqi Date: Thu, 14 Mar 2019 15:28:19 +0800 Subject: [PATCH 13/14] remove javaclient and javacore from the windows build --- SPTAG.sln | 6 ------ 1 file changed, 6 deletions(-) diff --git a/SPTAG.sln b/SPTAG.sln index e65ed07e..77adfe2b 100644 --- a/SPTAG.sln +++ b/SPTAG.sln @@ -155,19 +155,13 @@ Global {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x86.ActiveCfg = Release|Win32 {29A25655-CCF2-47F8-8BC8-DFE1B5CF993C}.Release|x86.Build.0 = Release|Win32 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x64.ActiveCfg = Debug|x64 - {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x64.Build.0 = Debug|x64 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x86.ActiveCfg = Debug|Win32 - {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Debug|x86.Build.0 = Debug|Win32 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x64.ActiveCfg = Release|x64 {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x86.ActiveCfg = Release|Win32 - {93FEB26B-965E-4157-8BE5-052F5CA112BB}.Release|x86.Build.0 = Release|Win32 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x64.ActiveCfg = Debug|x64 - {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x64.Build.0 = Debug|x64 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.ActiveCfg = Debug|Win32 - {8866BF98-AA2E-450F-9F33-083E007CCA74}.Debug|x86.Build.0 = Debug|Win32 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x64.ActiveCfg = Release|x64 {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.ActiveCfg = Release|Win32 - {8866BF98-AA2E-450F-9F33-083E007CCA74}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE From 322ddc6699aefeedcb8a32bbe50aee00bd2b74e4 Mon Sep 17 00:00:00 2001 From: cheqi Date: Tue, 7 May 2019 13:30:42 +0800 Subject: [PATCH 14/14] Fix SetData issue --- AnnService/inc/Core/BKT/ParameterDefinitionList.h | 4 ++-- AnnService/inc/Core/Common/NeighborhoodGraph.h | 2 ++ AnnService/inc/Core/CommonDataStructure.h | 8 +++----- AnnService/inc/Core/KDT/ParameterDefinitionList.h | 4 ++-- AnnService/src/Core/CommonDataStructure.cpp | 14 ++++---------- Wrappers/inc/JavaCommon.i | 4 ++-- 6 files changed, 15 insertions(+), 21 deletions(-) diff --git a/AnnService/inc/Core/BKT/ParameterDefinitionList.h b/AnnService/inc/Core/BKT/ParameterDefinitionList.h index 716c9b81..7ad17fcb 100644 --- a/AnnService/inc/Core/BKT/ParameterDefinitionList.h +++ b/AnnService/inc/Core/BKT/ParameterDefinitionList.h @@ -19,8 +19,8 @@ DefineBKTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize") DefineBKTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTpTreeSplit") DefineBKTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") -DefineBKTParameter(m_pGraph.m_iNeighborhoodScale, int, 16L, "GraphNeighborhoodScale") -DefineBKTParameter(m_pGraph.m_iCEFScale, int, 4L, "GraphCEFScale") +DefineBKTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale") +DefineBKTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale") DefineBKTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations") DefineBKTParameter(m_pGraph.m_iCEF, int, 1000L, "CEF") DefineBKTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckForRefineGraph") diff --git a/AnnService/inc/Core/Common/NeighborhoodGraph.h b/AnnService/inc/Core/Common/NeighborhoodGraph.h index 3da45a1c..55c78cd8 100644 --- a/AnnService/inc/Core/Common/NeighborhoodGraph.h +++ b/AnnService/inc/Core/Common/NeighborhoodGraph.h @@ -121,6 +121,7 @@ namespace SPTAG for (int i = 0; i < m_iGraphSize; i++) { RefineNode(index, i, false); + if (i % 1000 == 0) std::cout << "\rRefine 1 " << (i * 100 / m_iGraphSize) << "%"; } std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(index, 100, idmap) << std::endl; @@ -132,6 +133,7 @@ namespace SPTAG for (int i = 0; i < m_iGraphSize; i++) { RefineNode(index, i, false); + if (i % 1000 == 0) std::cout << "\rRefine 2 " << (i * 100 / m_iGraphSize) << "%"; } std::cout << "Refine RNG, graph acc:" << GraphAccuracyEstimation(index, 100, idmap) << std::endl; diff --git a/AnnService/inc/Core/CommonDataStructure.h b/AnnService/inc/Core/CommonDataStructure.h index 9fe2d720..74d7980f 100644 --- a/AnnService/inc/Core/CommonDataStructure.h +++ b/AnnService/inc/Core/CommonDataStructure.h @@ -32,11 +32,9 @@ class ByteArray std::uint8_t* Data() const; - void SetData(std::uint8_t* p_array); - - std::size_t Length() const; - - void SetLength(std::size_t p_length); + std::size_t Length() const; + + void SetData(std::uint8_t* p_array, std::size_t p_length); std::shared_ptr DataHolder() const; diff --git a/AnnService/inc/Core/KDT/ParameterDefinitionList.h b/AnnService/inc/Core/KDT/ParameterDefinitionList.h index 62dbc3a1..70d70ccb 100644 --- a/AnnService/inc/Core/KDT/ParameterDefinitionList.h +++ b/AnnService/inc/Core/KDT/ParameterDefinitionList.h @@ -17,8 +17,8 @@ DefineKDTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize") DefineKDTParameter(m_pGraph.m_numTopDimensionTPTSplit, int, 5L, "NumTopDimensionTPTSplit") DefineKDTParameter(m_pGraph.m_iNeighborhoodSize, int, 32L, "NeighborhoodSize") -DefineKDTParameter(m_pGraph.m_iNeighborhoodScale, int, 16L, "GraphNeighborhoodScale") -DefineKDTParameter(m_pGraph.m_iCEFScale, int, 4L, "GraphCEFScale") +DefineKDTParameter(m_pGraph.m_iNeighborhoodScale, int, 2L, "GraphNeighborhoodScale") +DefineKDTParameter(m_pGraph.m_iCEFScale, int, 2L, "GraphCEFScale") DefineKDTParameter(m_pGraph.m_iRefineIter, int, 0L, "RefineIterations") DefineKDTParameter(m_pGraph.m_iCEF, int, 1000L, "CEF") DefineKDTParameter(m_pGraph.m_iMaxCheckForRefineGraph, int, 10000L, "MaxCheckForRefineGraph") diff --git a/AnnService/src/Core/CommonDataStructure.cpp b/AnnService/src/Core/CommonDataStructure.cpp index 8e872b0c..4a91554d 100644 --- a/AnnService/src/Core/CommonDataStructure.cpp +++ b/AnnService/src/Core/CommonDataStructure.cpp @@ -101,24 +101,18 @@ ByteArray::Data() const } -void -ByteArray::SetData(std::uint8_t* p_array) -{ - m_data = p_array; -} - - std::size_t ByteArray::Length() const { - return m_length; + return m_length; } void -ByteArray::SetLength(std::size_t p_length) +ByteArray::SetData(std::uint8_t* p_array, std::size_t p_length) { - m_length = p_length; + m_data = p_array; + m_length = p_length; } diff --git a/Wrappers/inc/JavaCommon.i b/Wrappers/inc/JavaCommon.i index 654bcdcf..8d36675d 100644 --- a/Wrappers/inc/JavaCommon.i +++ b/Wrappers/inc/JavaCommon.i @@ -4,8 +4,8 @@ %typemap(jtype) ByteArray "byte[]" %typemap(jstype) ByteArray "byte[]" %typemap(in) ByteArray { - $1.SetData((std::uint8_t*)JCALL2(GetByteArrayElements, jenv, $input, 0)); - $1.SetLength(JCALL1(GetArrayLength, jenv, $input)); + $1.SetData((std::uint8_t*)JCALL2(GetByteArrayElements, jenv, $input, 0), + JCALL1(GetArrayLength, jenv, $input)); } %typemap(out) ByteArray { $result = JCALL1(NewByteArray, jenv, $1.Length());