Skip to content

Commit

Permalink
#2689 Construct Knowhere Index Without Data (#2704)
Browse files Browse the repository at this point in the history
* Offset for IVFFlat

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* modify segment reader

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* add index_flat_nm

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* fix compilation issue

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix segment reader

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* Fix issue

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix hnsw_nm

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* fix nsg

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* fix bug

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* NSG

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix NSG issue

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* client test

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix index size

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* remove unnecessary things

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* add changelog

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix clang format

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* add changelog

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix compile error

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix compile error

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix compile error

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix compile error

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Fix issues

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* Change data to shared_ptr

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* fix hnsw and nsg ut

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* clang-format

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

* not to insert cache immediately

Signed-off-by: shengjun.li <shengjun.li@zilliz.com>

* Fix macro issue

Signed-off-by: sahuang <xiaohai.xu@zilliz.com>

Co-authored-by: shengjun.li <shengjun.li@zilliz.com>
  • Loading branch information
Xiaohai Xu and shengjun.li committed Jul 3, 2020
1 parent f0abfa2 commit 84c7701
Show file tree
Hide file tree
Showing 62 changed files with 4,255 additions and 394 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Please mark all changes in change log and use the issue from GitHub
- \#2509 Count up query statistics for debug ease
- \#2572 Support structured data index
- \#2585 Support IVF_PQ on GPU with using metric_type IP
- \#2689 Construct Knowhere Index Without Data

## Improvement
- \#2543 Remove secondary_path related code
Expand Down
5 changes: 5 additions & 0 deletions core/src/codecs/VectorIndexFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ class VectorIndexFormat {
virtual void
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) = 0;

virtual void
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, knowhere::BinaryPtr raw_data,
segment::VectorIndexPtr& vector_index) {
}
};

using VectorIndexFormatPtr = std::shared_ptr<VectorIndexFormat>;
Expand Down
4 changes: 4 additions & 0 deletions core/src/codecs/VectorsFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <memory>
#include <vector>

#include "index/knowhere/knowhere/common/BinarySet.h"
#include "segment/Vectors.h"
#include "storage/FSHandler.h"

Expand All @@ -37,6 +38,9 @@ class VectorsFormat {
virtual void
read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector<segment::doc_id_t>& uids) = 0;

virtual void
read_vectors(const storage::FSHandlerPtr& fs_ptr, knowhere::BinaryPtr& raw_vectors) = 0;

virtual void
read_vectors(const storage::FSHandlerPtr& fs_ptr, off_t offset, size_t num_bytes,
std::vector<uint8_t>& raw_vectors) = 0;
Expand Down
25 changes: 24 additions & 1 deletion core/src/codecs/default/DefaultVectorIndexFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ namespace milvus {
namespace codec {

knowhere::VecIndexPtr
DefaultVectorIndexFormat::read_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& path) {
DefaultVectorIndexFormat::read_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& path,
knowhere::BinaryPtr raw_data) {
milvus::TimeRecorder recorder("read_index");
knowhere::BinarySet load_data_list;

Expand Down Expand Up @@ -91,6 +92,12 @@ DefaultVectorIndexFormat::read_internal(const storage::FSHandlerPtr& fs_ptr, con
auto index =
vec_index_factory.CreateVecIndex(knowhere::OldIndexTypeToStr(current_type), knowhere::IndexMode::MODE_CPU);
if (index != nullptr) {
if (raw_data != nullptr) {
LOG_ENGINE_DEBUG_ << "load index with row data " << raw_data->size;
load_data_list.Append(RAW_DATA, raw_data);
length += raw_data->size;
}

index->Load(load_data_list);
index->SetIndexSize(length);
} else {
Expand All @@ -116,6 +123,22 @@ DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, const std::s
vector_index->SetVectorIndex(index);
}

void
DefaultVectorIndexFormat::read(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
knowhere::BinaryPtr raw_data, segment::VectorIndexPtr& vector_index) {
const std::lock_guard<std::mutex> lock(mutex_);

std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}

knowhere::VecIndexPtr index = read_internal(fs_ptr, location, raw_data);
vector_index->SetVectorIndex(index);
}

void
DefaultVectorIndexFormat::write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) {
Expand Down
6 changes: 5 additions & 1 deletion core/src/codecs/default/DefaultVectorIndexFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ class DefaultVectorIndexFormat : public VectorIndexFormat {
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
segment::VectorIndexPtr& vector_index) override;

void
read(const storage::FSHandlerPtr& fs_ptr, const std::string& location, knowhere::BinaryPtr raw_data,
segment::VectorIndexPtr& vector_index) override;

void
write(const storage::FSHandlerPtr& fs_ptr, const std::string& location,
const segment::VectorIndexPtr& vector_index) override;
Expand All @@ -48,7 +52,7 @@ class DefaultVectorIndexFormat : public VectorIndexFormat {

private:
knowhere::VecIndexPtr
read_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& path);
read_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& path, knowhere::BinaryPtr raw_data = nullptr);

private:
std::mutex mutex_;
Expand Down
52 changes: 52 additions & 0 deletions core/src/codecs/default/DefaultVectorsFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <algorithm>
#include <memory>

#include <boost/filesystem.hpp>

Expand Down Expand Up @@ -53,6 +54,30 @@ DefaultVectorsFormat::read_vectors_internal(const storage::FSHandlerPtr& fs_ptr,
fs_ptr->reader_ptr_->close();
}

void
DefaultVectorsFormat::read_vectors_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& raw_vectors) {
if (!fs_ptr->reader_ptr_->open(file_path.c_str())) {
std::string err_msg = "Failed to open file: " + file_path + ", error: " + std::strerror(errno);
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_CANNOT_OPEN_FILE, err_msg);
}

size_t num_bytes;
fs_ptr->reader_ptr_->read(&num_bytes, sizeof(size_t));

raw_vectors = std::make_shared<knowhere::Binary>();
raw_vectors->size = num_bytes;
raw_vectors->data = std::shared_ptr<uint8_t[]>(new uint8_t[num_bytes]);

// Beginning of file is num_bytes
fs_ptr->reader_ptr_->seekg(sizeof(size_t));

fs_ptr->reader_ptr_->read(raw_vectors->data.get(), num_bytes);

fs_ptr->reader_ptr_->close();
}

void
DefaultVectorsFormat::read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
std::vector<segment::doc_id_t>& uids) {
Expand Down Expand Up @@ -157,6 +182,32 @@ DefaultVectorsFormat::read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector
const auto& path = it->path();
if (path.extension().string() == user_id_extension_) {
read_uids_internal(fs_ptr, path.string(), uids);
break;
}
}
}

void
DefaultVectorsFormat::read_vectors(const storage::FSHandlerPtr& fs_ptr, knowhere::BinaryPtr& raw_vectors) {
const std::lock_guard<std::mutex> lock(mutex_);

std::string dir_path = fs_ptr->operation_ptr_->GetDirectory();
if (!boost::filesystem::is_directory(dir_path)) {
std::string err_msg = "Directory: " + dir_path + "does not exist";
LOG_ENGINE_ERROR_ << err_msg;
throw Exception(SERVER_INVALID_ARGUMENT, err_msg);
}

boost::filesystem::path target_path(dir_path);
typedef boost::filesystem::directory_iterator d_it;
d_it it_end;
d_it it(target_path);
// for (auto& it : boost::filesystem::directory_iterator(dir_path)) {
for (; it != it_end; ++it) {
const auto& path = it->path();
if (path.extension().string() == raw_vector_extension_) {
read_vectors_internal(fs_ptr, path.string(), raw_vectors);
break;
}
}
}
Expand All @@ -182,6 +233,7 @@ DefaultVectorsFormat::read_vectors(const storage::FSHandlerPtr& fs_ptr, off_t of
const auto& path = it->path();
if (path.extension().string() == raw_vector_extension_) {
read_vectors_internal(fs_ptr, path.string(), offset, num_bytes, raw_vectors);
break;
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions core/src/codecs/default/DefaultVectorsFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class DefaultVectorsFormat : public VectorsFormat {
void
read_uids(const storage::FSHandlerPtr& fs_ptr, std::vector<segment::doc_id_t>& uids) override;

void
read_vectors(const storage::FSHandlerPtr& fs_ptr, knowhere::BinaryPtr& raw_vectors) override;

void
read_vectors(const storage::FSHandlerPtr& fs_ptr, off_t offset, size_t num_bytes,
std::vector<uint8_t>& raw_vectors) override;
Expand All @@ -58,6 +61,10 @@ class DefaultVectorsFormat : public VectorsFormat {
read_vectors_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path, off_t offset, size_t num,
std::vector<uint8_t>& raw_vectors);

void
read_vectors_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
knowhere::BinaryPtr& raw_vectors);

void
read_uids_internal(const storage::FSHandlerPtr& fs_ptr, const std::string& file_path,
std::vector<segment::doc_id_t>& uids);
Expand Down
12 changes: 11 additions & 1 deletion core/src/db/engine/ExecutionEngineImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ IsBinaryIndexType(knowhere::IndexType type) {
return type == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP || type == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT;
}

bool
IndexSupportOffset(EngineType type) {
return type == EngineType::FAISS_IVFFLAT || type == EngineType::HNSW || type == EngineType::NSG_MIX;
}

} // namespace

#ifdef MILVUS_GPU_VERSION
Expand Down Expand Up @@ -448,7 +453,12 @@ ExecutionEngineImpl::Load(bool to_cache) {
try {
segment::SegmentPtr segment_ptr;
segment_reader_ptr->GetSegment(segment_ptr);
auto status = segment_reader_ptr->LoadVectorIndex(location_, segment_ptr->vector_index_ptr_);
if (IndexSupportOffset(index_type_)) {
auto status =
segment_reader_ptr->LoadVectorIndexWithRawData(location_, segment_ptr->vector_index_ptr_);
} else {
auto status = segment_reader_ptr->LoadVectorIndex(location_, segment_ptr->vector_index_ptr_);
}
index_ = segment_ptr->vector_index_ptr_->GetVectorIndex();

if (index_ == nullptr) {
Expand Down
24 changes: 17 additions & 7 deletions core/src/index/knowhere/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ set(external_srcs
knowhere/common/Timer.cpp
)

set(index_srcs
set(vector_index_srcs
knowhere/index/vector_index/adapter/VectorAdapter.cpp
knowhere/index/vector_index/helpers/FaissIO.cpp
knowhere/index/vector_index/helpers/IndexParameter.cpp
Expand All @@ -56,23 +56,28 @@ set(index_srcs
knowhere/index/vector_index/FaissBaseIndex.cpp
knowhere/index/vector_index/IndexBinaryIDMAP.cpp
knowhere/index/vector_index/IndexBinaryIVF.cpp
knowhere/index/vector_index/IndexHNSW.cpp
knowhere/index/vector_index/IndexIDMAP.cpp
knowhere/index/vector_index/IndexIVF.cpp
knowhere/index/vector_index/IndexIVFPQ.cpp
knowhere/index/vector_index/IndexIVFSQ.cpp
knowhere/index/vector_index/IndexNSG.cpp
knowhere/index/vector_index/IndexType.cpp
knowhere/index/vector_index/VecIndexFactory.cpp
knowhere/index/vector_index/IndexAnnoy.cpp
)

set(vector_offset_index_srcs
knowhere/index/vector_offset_index/OffsetBaseIndex.cpp
knowhere/index/vector_offset_index/IndexIVF_NM.cpp
knowhere/index/vector_offset_index/IndexHNSW_NM.cpp
knowhere/index/vector_offset_index/IndexNSG_NM.cpp
)

if (MILVUS_SUPPORT_SPTAG)
set(index_srcs
set(vector_index_srcs
knowhere/index/vector_index/adapter/SptagAdapter.cpp
knowhere/index/vector_index/helpers/SPTAGParameterMgr.cpp
knowhere/index/vector_index/IndexSPTAG.cpp
${index_srcs}
${vector_index_srcs}
)
endif ()

Expand Down Expand Up @@ -117,7 +122,7 @@ if (MILVUS_GPU_VERSION)
${cuda_lib}
)

set(index_srcs ${index_srcs}
set(vector_index_srcs ${vector_index_srcs}
knowhere/index/vector_index/gpu/IndexGPUIDMAP.cpp
knowhere/index/vector_index/gpu/IndexGPUIVF.cpp
knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp
Expand All @@ -126,13 +131,18 @@ if (MILVUS_GPU_VERSION)
knowhere/index/vector_index/helpers/Cloner.cpp
knowhere/index/vector_index/helpers/FaissGpuResourceMgr.cpp
)

set(vector_offset_index_srcs ${vector_offset_index_srcs}
knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp
)
endif ()

if (NOT TARGET knowhere)
add_library(
knowhere STATIC
${external_srcs}
${index_srcs}
${vector_index_srcs}
${vector_offset_index_srcs}
)
endif ()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ NSG::Query(const DatasetPtr& dataset_ptr, const Config& config) {
s_params.k = config[meta::TOPK];
{
std::lock_guard<std::mutex> lk(mutex_);
index_->Search((float*)p_data, rows, dim, config[meta::TOPK].get<int64_t>(), p_dist, p_id, s_params,
blacklist);
index_->Search((float*)p_data, nullptr, rows, dim, config[meta::TOPK].get<int64_t>(), p_dist, p_id,
s_params, blacklist);
}

auto ret_ds = std::make_shared<Dataset>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
namespace milvus {
namespace knowhere {

#define RAW_DATA "RAW_DATA"

class VecIndex : public Index {
public:
virtual void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
#include "knowhere/index/vector_index/IndexAnnoy.h"
#include "knowhere/index/vector_index/IndexBinaryIDMAP.h"
#include "knowhere/index/vector_index/IndexBinaryIVF.h"
#include "knowhere/index/vector_index/IndexHNSW.h"
#include "knowhere/index/vector_index/IndexIDMAP.h"
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
#include "knowhere/index/vector_index/IndexNSG.h"
#include "knowhere/index/vector_offset_index/IndexHNSW_NM.h"
#include "knowhere/index/vector_offset_index/IndexIVF_NM.h"
#include "knowhere/index/vector_offset_index/IndexNSG_NM.h"
#ifdef MILVUS_SUPPORT_SPTAG
#include "knowhere/index/vector_index/IndexSPTAG.h"
#endif
Expand All @@ -34,6 +35,7 @@
#include "knowhere/index/vector_index/gpu/IndexGPUIVFSQ.h"
#include "knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h"
#include "knowhere/index/vector_index/helpers/Cloner.h"
#include "knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.h"
#endif

namespace milvus {
Expand All @@ -47,10 +49,10 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
} else if (type == IndexEnum::INDEX_FAISS_IVFFLAT) {
#ifdef MILVUS_GPU_VERSION
if (mode == IndexMode::MODE_GPU) {
return std::make_shared<knowhere::GPUIVF>(gpu_device);
return std::make_shared<knowhere::GPUIVF_NM>(gpu_device);
}
#endif
return std::make_shared<knowhere::IVF>();
return std::make_shared<knowhere::IVF_NM>();
} else if (type == IndexEnum::INDEX_FAISS_IVFPQ) {
#ifdef MILVUS_GPU_VERSION
if (mode == IndexMode::MODE_GPU) {
Expand All @@ -74,15 +76,15 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
} else if (type == IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
return std::make_shared<knowhere::BinaryIVF>();
} else if (type == IndexEnum::INDEX_NSG) {
return std::make_shared<knowhere::NSG>(-1);
return std::make_shared<knowhere::NSG_NM>(-1);
#ifdef MILVUS_SUPPORT_SPTAG
} else if (type == IndexEnum::INDEX_SPTAG_KDT_RNT) {
return std::make_shared<knowhere::CPUSPTAGRNG>("KDT");
} else if (type == IndexEnum::INDEX_SPTAG_BKT_RNT) {
return std::make_shared<knowhere::CPUSPTAGRNG>("BKT");
#endif
} else if (type == IndexEnum::INDEX_HNSW) {
return std::make_shared<knowhere::IndexHNSW>();
return std::make_shared<knowhere::IndexHNSW_NM>();
} else if (type == IndexEnum::INDEX_ANNOY) {
return std::make_shared<knowhere::IndexAnnoy>();
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "knowhere/index/vector_index/gpu/GPUIndex.h"
#include "knowhere/index/vector_index/gpu/IndexGPUIVF.h"
#include "knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h"
#include "knowhere/index/vector_offset_index/IndexIVF_NM.h"

namespace milvus {
namespace knowhere {
Expand Down Expand Up @@ -50,6 +51,8 @@ CopyCpuToGpu(const VecIndexPtr& index, const int64_t device_id, const Config& co
VecIndexPtr result;
if (auto device_index = std::dynamic_pointer_cast<IVFSQHybrid>(index)) {
result = device_index->CopyCpuToGpu(device_id, config);
} else if (auto cpu_index = std::dynamic_pointer_cast<IVF_NM>(index)) {
result = cpu_index->CopyCpuToGpu(device_id, config);
} else if (auto device_index = std::dynamic_pointer_cast<GPUIndex>(index)) {
result = device_index->CopyGpuToGpu(device_id, config);
} else if (auto cpu_index = std::dynamic_pointer_cast<IVFSQ>(index)) {
Expand Down
Loading

0 comments on commit 84c7701

Please sign in to comment.