In [1]:
from pymilvus import connections, db

conn = connections.connect(host="127.0.0.1", port=19530)

# database = db.create_database("faceid")

In [2]:
print(db.list_database())
db.drop_database("books")
print(db.list_database())

['faceid', 'default', 'faceid_test']
['faceid', 'default', 'faceid_test']


In [3]:
db.using_database("faceid")

## create collection

### Prepare Schema

In [4]:
from pymilvus import CollectionSchema, FieldSchema, DataType

In [5]:
_code = FieldSchema(
    name="ID", 
    dtype=DataType.VARCHAR,
    max_length=100,
    is_primary=True,
)
_name = FieldSchema(
    name = "Name_ID",
    dtype=DataType.VARCHAR,
    max_length=200,
)
_department = FieldSchema(
    name="Department", 
    dtype=DataType.VARCHAR,
    max_length=200,
)
_vn_name = FieldSchema(
    name = "Name_VN",
    dtype=DataType.VARCHAR,
    max_length=200,
)
_embedder = FieldSchema(
    name="Embeddings", 
    dtype=DataType.FLOAT_VECTOR, 
    dim=8
)

In [6]:
schema = CollectionSchema(
  fields=[_code, _name,_department,_vn_name,_embedder],
  description="Faceid search",
  enable_dynamic_field=True
)

In [7]:
schema

{'auto_id': False, 'description': 'Faceid search', 'fields': [{'name': 'ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}, 'is_primary': True, 'auto_id': False}, {'name': 'Name_ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Department', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Name_VN', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 8}}], 'enable_dynamic_field': True}

## Create a collection with the schema

In [8]:
from pymilvus import utility
utility.drop_collection("faceid_collection")

In [9]:
from pymilvus import Collection

collection_name = "faceid_collection"

collection = Collection(
    name=collection_name,
    schema=schema,
    using='default',
    shards_num=2
    )

In [10]:
collection

<Collection>:
-------------
<name>: faceid_collection
<description>: Faceid search
<schema>: {'auto_id': False, 'description': 'Faceid search', 'fields': [{'name': 'ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}, 'is_primary': True, 'auto_id': False}, {'name': 'Name_ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Department', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Name_VN', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 8}}], 'enable_dynamic_field': True}

## Inserts vectors in the collection:

In [11]:
import csv
from typing import Dict, List

In [12]:
with open("./namelists.csv", encoding="utf8") as f:
    reader = csv.DictReader(f)
    members: List[Dict[str, str]] = [row for row in reader]
    print(members)

[{'Name_ID': 'Nguyễn_Văn_Thiên_NS0110_PVHHT_ICARE', 'ID': 'NS0110', 'Name_VN': 'Nguyễn Văn Thiên', 'Department': 'PVHHT_ICARE'}, {'Name_ID': 'Trần_Quang_Duy_NS0174_PM_SDS', 'ID': 'NS0174', 'Name_VN': 'Trần Quang Duy', 'Department': 'PM_SDS'}]


In [13]:
from pymilvus import Collection
collection = Collection("faceid_collection")

In [14]:
import random
[[random.random() for _ in range(5)] for _ in range(2)]

[[0.494065069627854,
  0.7057794067125425,
  0.3428675086691281,
  0.6631912754899633,
  0.704297306238037],
 [0.9057523271561128,
  0.20648732686821403,
  0.8335198490735616,
  0.8460147096446642,
  0.976302847264862]]

In [15]:
entities = [
    ['NS0110','NS0174'],
    ["Nguyễn_Văn_Thiên_NS0110_PVHHT_ICARE","Trần_Quang_Duy_NS0174_PM_SDS"],
    ["PVHHT_ICARE","PM_SDS"],
    ["Nguyễn Văn Thiên","Trần Quang Duy"],
    [[random.random() for _ in range(8)] for _ in range(2)]
]

In [22]:
[[random.random() for _ in range(8)] for _ in range(2)]

[[0.6465734830110454,
  0.8189648413088378,
  0.5679883184299664,
  0.6365124415114317,
  0.2926076286052547,
  0.5245315617689696,
  0.519597783590062,
  0.591372030133997],
 [0.5149733338231346,
  0.9373265872518305,
  0.7198807814039823,
  0.3993388374267969,
  0.910958669887853,
  0.25712016154662,
  0.7436034277766088,
  0.8485472442869106]]

In [16]:
collection.insert(entities)

(insert count: 2, delete count: 0, upsert count: 0, timestamp: 442035664232382471, success count: 2, err count: 0)

In [17]:
collection.flush()  

In [18]:
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}
collection.create_index("Embeddings", index)

Status(code=0, message=)

In [19]:
collection.load()
vectors_to_search = entities[-1][-1:]
vectors_to_search

[[0.9342269281885706,
  0.3340299227727074,
  0.04205893992945886,
  0.8911210909677656,
  0.4581711515355674,
  0.3662103311377437,
  0.29456119466464903,
  0.5569652822824508]]

In [26]:
import numpy as np
np.array(vectors_to_search).shape

(1, 8)

In [20]:

search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 12},
}
results = collection.search(vectors_to_search, "Embeddings", search_params, limit=5, output_fields=["Code","Name_VN"])

In [21]:
for i, result in enumerate(results):
    print("\nSearch result for {}th vector: ".format(i))
    for j, res in enumerate(result):
        print("Top {}: {}".format(j, res))


Search result for 0th vector: 
Top 0: id: NS0174, distance: 0.0, entity: {'Name_VN': 'Trần Quang Duy'}
Top 1: id: NS0110, distance: 1.175869107246399, entity: {'Name_VN': 'Nguyễn Văn Thiên'}
