In [6]:
from tqdm import tqdm
from time import perf_counter
from pymilvus import MilvusClient
import polars as pl
import numpy as np
from core_pro.ultilities import make_sync_folder, create_batch_index

In [9]:
path = make_sync_folder('dataset/item_matching')
file = path / 'data_sample_FMCG_clean.parquet'

df = (
    pl.read_parquet(file)
    .with_row_index('id')
)
df.head()

id,index,item_id,item_name,shop_id,shop_name,level1_global_be_category,level2_global_be_category,level3_global_be_category,cluster,description,images,image_url,item_name_clean,file_path
u32,u32,i64,str,i64,str,str,str,str,str,str,str,str,str,str
0,0,19092271907,"""LINEABON K2D3 nhập khẩu châu â…",1053944724,"""ChiChi.Kids17""","""Mom & Baby""","""Baby Healthcare""","""Baby Vitamins & Supplements""","""FMCG""","""Nguồn gốc xuất xứ K2 D3 Lineab…","""vn-11134207-7ras8-m49p5xv9ecb3…","""http://f.shopee.vn/file/vn-111…","""lineabon k2d3 nhập khẩu châu â…","""/media/kevin/data_4t/dataset/i…"
1,1,17398587723,"""[MKB Gift] Bộ 3 Khăn ướt Moony…",63522286,"""Moony - Gian Hàng Chính Hãng""","""Mom & Baby""","""Bath & Body Care""","""Wipes""","""FMCG""","""[{""t"":""Bộ 3 khăn ướt Moony 50 …","""eb26f55f7359e0de333f0c34e2619f…","""http://f.shopee.vn/file/eb26f5…","""bộ 3 khăn ướt moony 50 miếng/g…","""/media/kevin/data_4t/dataset/i…"
2,2,19111299596,"""Bàn Chải Đánh Răng Lipzo Sensi…",170502615,"""Niva Lipzo Official Store""","""Health""","""Personal Care""","""Oral Care""","""FMCG""","""[{""t"":""BÀN CHẢI ĐÁNH RĂNG LIPZ…","""vn-11134207-7ras8-m2kwpjg8vb46…","""http://f.shopee.vn/file/vn-111…","""bàn chải đánh răng lipzo sensi…","""/media/kevin/data_4t/dataset/i…"
3,3,25583412760,"""Rong biển ăn liền Bibizan siêu…",119247917,"""King 21""","""Food & Beverages""","""Snacks""","""Seaweed""","""FMCG""","""NGÀY IN TRÊN BAO BÌ LÀ NGÀY SẢ…","""vn-11134207-7r98o-lxqfhxwp3h0b…","""http://f.shopee.vn/file/vn-111…","""rong biển ăn liền bibizan siêu…","""/media/kevin/data_4t/dataset/i…"
4,4,15930150764,"""(MUA 2 SON TẶNG 1 TÚI, 1 CUSHI…",181790483,"""Lam Thảo Cosmetics""","""Beauty""","""Makeup""","""Lips""","""FMCG""","""Son Tint Bóng Espoir Couture L…","""vn-11134207-7r98o-lyrll7tp71y9…","""http://f.shopee.vn/file/vn-111…","""son tint bóng espoir couture l…","""/media/kevin/data_4t/dataset/i…"


In [10]:
item = df['item_name_clean'].to_list()
path_array = path / 'tmp/array/bge/embed.npy'
embeddings = np.load(path_array)

In [11]:
client = MilvusClient("milvus_demo.db")

collection = 'demo_collection'
if client.has_collection(collection_name=collection):
    client.drop_collection(collection_name=collection)
    
client.create_collection(
    collection_name=collection,
    dimension=embeddings.shape[1],
    metric_type="IP"
)

In [12]:
data = df.to_dicts()

In [13]:
start = perf_counter()

total_sample = len(data)
QUERY_SIZE = 20_000
run = create_batch_index(len(embeddings), QUERY_SIZE)
for i, val in run.items():
    start_idx, end_idx = val[0], val[-1]
    batch = data[start_idx:end_idx]
    res = client.insert(collection_name="demo_collection", data=batch)
    
end = perf_counter() - start
print(f'Total Index Time: {end:,.0f}s')

2025-01-14 14:39:32,188 [ERROR][handler]: RPC error: [insert_rows], <DataNotMatchException: (code=1, message=Insert missed an field `vector` to collection without set nullable==true or set default_value)>, <Time:{'RPC start': '2025-01-14 14:39:32.188135', 'RPC error': '2025-01-14 14:39:32.188893'}> (decorators.py:140)


DataNotMatchException: <DataNotMatchException: (code=1, message=Insert missed an field `vector` to collection without set nullable==true or set default_value)>

In [7]:
start = perf_counter()

batch_size = 100
num_batches = (total_sample + batch_size) // batch_size
print(f'Total batches: {num_batches}, Batch size: {batch_size:,.0f}')
for i, idx in tqdm(enumerate(range(num_batches), start=1), total=num_batches):
    start_idx = idx * batch_size
    end_idx = min(start_idx + batch_size, total_sample)
    batch = embeddings[start_idx:end_idx].tolist()
    res = client.search(
        collection_name=collection,
        data=batch,
        limit=10,
        search_params={"metric_type": "IP", "params": {}},
        output_fields=col
    )

end = perf_counter() - start
print(f'Total Query Time: {end:,.0f}s')

Total batches: 722, Batch size: 100


100%|████████████████████████████████████████████████████| 722/722 [01:16<00:00,  9.47it/s]

Total Query Time: 76s





In [10]:
df[start_idx:end_idx].head()

id,q_item_id,q_level1_global_be_category,q_item_name,q_link_first_image,q_item_name_clean,vector
u32,i64,str,str,str,str,"array[f32, 1024]"
72100,29352685061,"""Men Shoes""","""（100% Auth ）Giày Sneaker Nam N…","""https://cf.shopee.sg/file/sg-1…","""（100% auth ）giày sneaker nam n…","[0.039612, -0.024429, … 0.011963]"
72101,25680344702,"""Men Shoes""","""Bình xịt khử mùi giày,tất bình…","""https://cf.shopee.sg/file/vn-1…","""bình xịt khử mùi giày,tất bình…","[0.017593, -0.000049, … 0.007217]"
72102,5465959941,"""Men Shoes""","""Giày thể thao sneaker ultra bo…","""https://cf.shopee.sg/file/f200…","""giày thể thao sneaker ultra bo…","[0.015053, -0.012138, … 0.065979]"
72103,19089276484,"""Men Shoes""","""Xịt khử mùi hôi giày dép công …","""https://cf.shopee.sg/file/vn-1…","""xịt khử mùi hôi giày dép công …","[-0.017456, -0.049622, … 0.01416]"
72104,28402589440,"""Men Shoes""","""（100% Auth ）Giày Sneaker Nam N…","""https://cf.shopee.sg/file/sg-1…","""（100% auth ）giày sneaker nam n…","[-0.010597, -0.008514, … 0.000468]"


In [8]:
res[1]

[{'id': 72101,
  'distance': 0.9998304843902588,
  'entity': {'q_item_id': 25680344702,
   'q_level1_global_be_category': 'Men Shoes',
   'q_item_name': 'Bình xịt khử mùi giày,tất bình xịt thể thao khử mùi hôi chân thơm mát,kháng khuẩn',
   'q_link_first_image': 'https://cf.shopee.sg/file/vn-11134207-7r98o-lv92vrtokxrte1',
   'q_item_name_clean': 'bình xịt khử mùi giày,tất bình xịt thể thao khử mùi hôi chân thơm mát,kháng khuẩn'}},
 {'id': 17097,
  'distance': 0.9708941578865051,
  'entity': {'q_item_id': 26954717704,
   'q_level1_global_be_category': 'Men Shoes',
   'q_item_name': 'Giày nam Da bò Lacos da trơn màu đen bản da cực mềm và êm chân, có kèm ảnh tại shop ảnh khách thử',
   'q_link_first_image': 'https://cf.shopee.sg/file/vn-11134207-7r98o-lxf4462eu9qx7a',
   'q_item_name_clean': 'giày nam da bò lacos da trơn màu đen bản da cực mềm và êm chân, có kèm ảnh tại shop ảnh khách thử'}},
 {'id': 48462,
  'distance': 0.953707218170166,
  'entity': {'q_item_id': 11064010076,
   'q_lev