Skip to content

Commit

Permalink
test: add test case for bulkwriter (#33879)
Browse files Browse the repository at this point in the history
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
  • Loading branch information
zhuwenxing committed Jun 20, 2024
1 parent 0264588 commit f3d902c
Show file tree
Hide file tree
Showing 3 changed files with 390 additions and 40 deletions.
16 changes: 8 additions & 8 deletions tests/python_client/common/bulk_insert_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
class DataField:
pk_field = "uid"
vec_field = "vectors"
float_vec_field = "float_vectors"
float_vec_field = "float32_vectors"
sparse_vec_field = "sparse_vectors"
image_float_vec_field = "image_float_vec_field"
text_float_vec_field = "text_float_vec_field"
binary_vec_field = "binary_vec_field"
bf16_vec_field = "bf16_vec_field"
fp16_vec_field = "fp16_vec_field"
bf16_vec_field = "brain_float16_vec_field"
fp16_vec_field = "float16_vec_field"
int_field = "int_scalar"
string_field = "string_scalar"
bool_field = "bool_scalar"
Expand Down Expand Up @@ -504,16 +504,16 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
data = []
if rows > 0:
if "vec" in data_field:
if "float" in data_field:
if "float" in data_field and "16" not in data_field:
data = gen_vectors(float_vector=True, rows=rows, dim=dim)
data = pd.Series([np.array(x, dtype=np.dtype("float32")) for x in data])
elif "sparse" in data_field:
data = gen_sparse_vectors(rows, sparse_format=sparse_format)
data = pd.Series([json.dumps(x) for x in data], dtype=np.dtype("str"))
elif "fp16" in data_field:
elif "float16" in data_field:
data = gen_fp16_vectors(rows, dim)[1]
data = pd.Series([np.array(x, dtype=np.dtype("uint8")) for x in data])
elif "bf16" in data_field:
elif "brain_float16" in data_field:
data = gen_bf16_vectors(rows, dim)[1]
data = pd.Series([np.array(x, dtype=np.dtype("uint8")) for x in data])
elif "binary" in data_field:
Expand Down Expand Up @@ -758,10 +758,10 @@ def gen_npy_files(float_vector, rows, dim, data_fields, file_size=None, file_num
if "binary" in data_field:
float_vector = False
vector_type = "binary"
if "bf16" in data_field:
if "brain_float16" in data_field:
float_vector = True
vector_type = "bf16"
if "fp16" in data_field:
if "float16" in data_field:
float_vector = True
vector_type = "fp16"

Expand Down
24 changes: 12 additions & 12 deletions tests/python_client/common/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -2114,19 +2114,19 @@ def gen_fp16_vectors(num, dim):
return raw_vectors, fp16_vectors


def gen_sparse_vectors(nb, dim):
"""
generate sparse vector data
return sparse_vectors
"""
def gen_sparse_vectors(nb, dim=1000, sparse_format="dok"):
# default sparse format is dok, dict of keys
# another option is coo, coordinate List

rng = np.random.default_rng()
entities = [
{
d: rng.random() for d in random.sample(range(dim), random.randint(1, 1))
}
for _ in range(nb)
]
return entities
vectors = [{
d: rng.random() for d in random.sample(range(dim), random.randint(20, 30))
} for _ in range(nb)]
if sparse_format == "coo":
vectors = [
{"indices": list(x.keys()), "values": list(x.values())} for x in vectors
]
return vectors


def gen_vectors_based_on_vector_type(num, dim, vector_data_type):
Expand Down
Loading

0 comments on commit f3d902c

Please sign in to comment.