Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sparse vector support #285

Merged
merged 14 commits into from
Mar 19, 2024
4 changes: 4 additions & 0 deletions milvus/HttpClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ export class HttpBaseClient {
Authorization: this.authorization,
Accept: 'application/json',
ContentType: 'application/json',
'Accept-Type-Allow-Int64':
typeof this.config.acceptInt64 !== 'undefined'
? this.config.acceptInt64.toString()
: 'false',
};
}

Expand Down
8 changes: 8 additions & 0 deletions milvus/const/milvus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ export enum IndexType {
DISKANN = 'DISKANN',
AUTOINDEX = 'AUTOINDEX',
ANNOY = 'ANNOY',
SPARSE_INVERTED_INDEX = 'SPARSE_INVERTED_INDEX',
SPARSE_WAND = 'SPARSE_WAND',
// 2.3
GPU_FLAT = 'GPU_FLAT',
GPU_IVF_FLAT = 'GPU_IVF_FLAT',
Expand Down Expand Up @@ -264,6 +266,9 @@ export enum DataType {

BinaryVector = 100,
FloatVector = 101,
Float16Vector = 102,
BFloat16Vector = 103,
SparseFloatVector = 104,
}

// data type map
Expand All @@ -282,6 +287,9 @@ export const DataTypeMap: { [key in keyof typeof DataType]: number } = {
JSON: 23,
BinaryVector: 100,
FloatVector: 101,
Float16Vector: 102,
BFloat16Vector: 103,
SparseFloatVector: 104,
};

// RBAC: operate user role type
Expand Down
102 changes: 67 additions & 35 deletions milvus/grpc/Data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,19 @@ import {
CountReq,
CountResult,
DEFAULT_COUNT_QUERY_STRING,
SparseFloatVectors,
parseSparseRowsToBytes,
getSparseDim,
} from '../';
import { Collection } from './Collection';

export class Data extends Collection {
// vectorTypes
vectorTypes = [DataType.BinaryVector, DataType.FloatVector];
vectorTypes = [
DataType.BinaryVector,
DataType.FloatVector,
DataType.SparseFloatVector,
];

/**
* Upsert data into Milvus, view _insert for detail
Expand Down Expand Up @@ -209,49 +216,74 @@ export class Data extends Collection {
const elementType = DataTypeMap[field.elementType!];
const elementTypeKey = getDataKey(elementType);

// build key value
let keyValue;
switch (type) {
case DataType.FloatVector:
keyValue = {
dim: field.dim,
[dataKey]: {
data: field.data,
},
};
break;
case DataType.BinaryVector:
keyValue = {
dim: field.dim,
[dataKey]: parseBinaryVectorToBytes(field.data as BinaryVectors),
};
break;
case DataType.SparseFloatVector:
const dim = getSparseDim(field.data as SparseFloatVectors[]);
keyValue = {
dim,
[dataKey]: {
dim,
contents: parseSparseRowsToBytes(
field.data as SparseFloatVectors[]
),
},
};
break;

case DataType.Array:
keyValue = {
[dataKey]: {
data: field.data.map(d => {
return {
[elementTypeKey]: {
type: elementType,
data: d,
},
};
}),
element_type: elementType,
},
};
break;
default:
keyValue = {
[dataKey]: {
data: field.data,
},
};
break;
}

return {
type,
field_name: field.name,
is_dynamic: field.name === DEFAULT_DYNAMIC_FIELD,
[key]:
type === DataType.FloatVector
? {
dim: field.dim,
[dataKey]: {
data: field.data,
},
}
: type === DataType.BinaryVector
? {
dim: field.dim,
[dataKey]: parseBinaryVectorToBytes(
field.data as BinaryVectors
),
}
: type === DataType.Array
? {
[dataKey]: {
data: field.data.map(d => {
return {
[elementTypeKey]: {
type: elementType,
data: d,
},
};
}),
element_type: elementType,
},
}
: {
[dataKey]: {
data: field.data,
},
},
[key]: keyValue,
};
});

// if timeout is not defined, set timeout to 0
const timeout = typeof data.timeout === 'undefined' ? 0 : data.timeout;
// delete data
try {
delete params.data;
} catch (e) {}
// execute Insert
const promise = await promisify(
this.channelPool,
Expand Down
3 changes: 2 additions & 1 deletion milvus/types/Data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import {
// all value types supported by milvus
export type FloatVectors = number[];
export type BinaryVectors = number[];
export type VectorTypes = FloatVectors | BinaryVectors;
export type SparseFloatVectors = { [key: string]: number };
export type VectorTypes = FloatVectors | BinaryVectors | SparseFloatVectors;
export type Bool = boolean;
export type Int8 = number;
export type Int16 = number;
Expand Down
4 changes: 3 additions & 1 deletion milvus/types/Http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ export type Constructor<T = {}> = new (...args: any[]) => T;
export type FetchOptions = {
abortController: AbortController;
timeout: number;
}
};

type HttpClientConfigBase = {
// database name
Expand All @@ -23,6 +23,8 @@ type HttpClientConfigBase = {
timeout?: number;
// altenative fetch api
fetch?: Fetch;
// accept int64
acceptInt64?: boolean;
};

type HttpClientConfigAddress = HttpClientConfigBase & {
Expand Down
15 changes: 0 additions & 15 deletions milvus/utils/Blob.ts

This file was deleted.

157 changes: 157 additions & 0 deletions milvus/utils/Bytes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import { Root } from 'protobufjs';
import {
FloatVectors,
BinaryVectors,
SparseFloatVectors,
DataType,
VectorTypes,
} from '..';

/**
* Converts a float vector into bytes format.
*
* @param {FloatVectors} array - The float vector to convert.
*
* @returns {Buffer} Bytes representing the float vector.
*/
export const parseFloatVectorToBytes = (array: FloatVectors) => {
// create array buffer
const a = new Float32Array(array);
// need return bytes to milvus proto
return Buffer.from(a.buffer);
};

/**
* Converts a binary vector into bytes format.
*
* @param {BinaryVectors} array - The binary vector to convert.
*
* @returns {Buffer} Bytes representing the binary vector.
*/
export const parseBinaryVectorToBytes = (array: BinaryVectors) => {
// create array buffer
const a = new Uint8Array(array);
// need return bytes to milvus proto
return Buffer.from(a.buffer);
};

/**
* Converts a sparse float vector into bytes format.
*
* @param {SparseFloatVectors} data - The sparse float vector to convert.
*
* @returns {Uint8Array} Bytes representing the sparse float vector.
* @throws {Error} If the length of indices and values is not the same, or if the index is not within the valid range, or if the value is NaN.
*/
export const parseSparseVectorToBytes = (
data: SparseFloatVectors
): Uint8Array => {
const indices = Object.keys(data).map(Number);
const values = Object.values(data);

const bytes = new Uint8Array(8 * indices.length);
for (let i = 0; i < indices.length; i++) {
const index = indices[i];
const value = values[i];
if (!(index >= 0 && index < Math.pow(2, 32) - 1)) {
throw new Error(
`Sparse vector index must be positive and less than 2^32-1: ${index}`
);
}
if (isNaN(value)) {
throw new Error('Sparse vector value must not be NaN');
}
const indexBytes = new Uint32Array([index]);
const valueBytes = new Float32Array([value]);
bytes.set(new Uint8Array(indexBytes.buffer), i * 8);
bytes.set(new Uint8Array(valueBytes.buffer), i * 8 + 4);
}
return bytes;
};

/**
* Converts an array of sparse float vectors into an array of bytes format.
*
* @param {SparseFloatVectors[]} data - The array of sparse float vectors to convert.
*
* @returns {Uint8Array[]} An array of bytes representing the sparse float vectors.
*/
export const parseSparseRowsToBytes = (
data: SparseFloatVectors[]
): Uint8Array[] => {
const result: Uint8Array[] = [];
for (const row of data) {
result.push(parseSparseVectorToBytes(row));
}
return result;
};

/**
* Parses the provided buffer data into a sparse row representation.
*
* @param {Buffer} bufferData - The buffer data to parse.
*
* @returns {SparseFloatVectors} The parsed sparse float vectors.
*/
export const parseBufferToSparseRow = (
bufferData: Buffer
): SparseFloatVectors => {
const result: SparseFloatVectors = {};
for (let i = 0; i < bufferData.length; i += 8) {
const key: string = bufferData.readUInt32LE(i).toString();
const value: number = bufferData.readFloatLE(i + 4);
result[key] = value;
}
return result;
};

/**
* This function builds a placeholder group in bytes format for Milvus.
*
* @param {Root} milvusProto - The root object of the Milvus protocol.
* @param {VectorTypes[]} searchVectors - An array of search vectors.
* @param {DataType} vectorDataType - The data type of the vectors.
*
* @returns {Uint8Array} The placeholder group in bytes format.
*/
export const buildPlaceholderGroupBytes = (
milvusProto: Root,
vectors: VectorTypes[],
vectorDataType: DataType
) => {
// create placeholder_group value
let bytes;
// parse vectors to bytes
switch (vectorDataType) {
case DataType.FloatVector:
bytes = vectors.map(v => parseFloatVectorToBytes(v as FloatVectors));
break;
case DataType.BinaryVector:
bytes = vectors.map(v => parseBinaryVectorToBytes(v as BinaryVectors));
break;
case DataType.SparseFloatVector:
bytes = vectors.map(v =>
parseSparseVectorToBytes(v as SparseFloatVectors)
);

break;
}
// create placeholder_group
const PlaceholderGroup = milvusProto.lookupType(
'milvus.proto.common.PlaceholderGroup'
);
// tag $0 is hard code in milvus, when dsltype is expr
const placeholderGroupBytes = PlaceholderGroup.encode(
PlaceholderGroup.create({
placeholders: [
{
tag: '$0',
type: vectorDataType,
values: bytes,
},
],
})
).finish();

return placeholderGroupBytes;
};
Loading
Loading