In [2]:
from pymilvus import connections, db

conn = connections.connect(host="127.0.0.1", port=19530)

# database = db.create_database("faceid")

In [3]:
print(db.list_database())
db.drop_database("books")
print(db.list_database())

['default', 'faceid_test', 'faceid']
['default', 'faceid_test', 'faceid']


In [4]:
db.using_database("faceid")

## create collection

### Prepare Schema

In [5]:
from pymilvus import CollectionSchema, FieldSchema, DataType

In [6]:
_code = FieldSchema(
    name="ID", 
    dtype=DataType.VARCHAR,
    max_length=100,
    is_primary=True,
)
_name = FieldSchema(
    name = "Name_ID",
    dtype=DataType.VARCHAR,
    max_length=200,
)
_department = FieldSchema(
    name="Department", 
    dtype=DataType.VARCHAR,
    max_length=200,
)
_vn_name = FieldSchema(
    name = "Name_VN",
    dtype=DataType.VARCHAR,
    max_length=200,
)
_embedder = FieldSchema(
    name="Embeddings", 
    dtype=DataType.FLOAT_VECTOR, 
    dim=8
)

In [7]:
schema = CollectionSchema(
  fields=[_code, _name,_department,_vn_name,_embedder],
  description="Faceid search",
  enable_dynamic_field=True
)

In [8]:
schema

{'auto_id': False, 'description': 'Faceid search', 'fields': [{'name': 'ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}, 'is_primary': True, 'auto_id': False}, {'name': 'Name_ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Department', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Name_VN', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 8}}], 'enable_dynamic_field': True}

## Create a collection with the schema

In [9]:
from pymilvus import utility
utility.drop_collection("faceid_collection")

In [10]:
from pymilvus import Collection

collection_name = "faceid_collection"

collection = Collection(
    name=collection_name,
    schema=schema,
    using='default',
    shards_num=2
    )

In [10]:
collection

<Collection>:
-------------
<name>: faceid_collection
<description>: Faceid search
<schema>: {'auto_id': False, 'description': 'Faceid search', 'fields': [{'name': 'ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 100}, 'is_primary': True, 'auto_id': False}, {'name': 'Name_ID', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Department', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Name_VN', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'Embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 8}}], 'enable_dynamic_field': True}

## Inserts vectors in the collection:

In [11]:
import csv
from typing import Dict, List

In [12]:
with open("./namelists.csv", encoding="utf8") as f:
    reader = csv.DictReader(f)
    members: List[Dict[str, str]] = [row for row in reader]
    print(members)

[{'Name_ID': 'Nguyễn_Văn_Thiên_NS0110_PVHHT_ICARE', 'ID': 'NS0110', 'Name_VN': 'Nguyễn Văn Thiên', 'Department': 'PVHHT_ICARE'}, {'Name_ID': 'Trần_Quang_Duy_NS0174_PM_SDS', 'ID': 'NS0174', 'Name_VN': 'Trần Quang Duy', 'Department': 'PM_SDS'}]


In [13]:
from pymilvus import Collection
collection = Collection("faceid_collection")

In [22]:
import random
[[random.random() for _ in range(5)] for _ in range(2)]

[[0.33355039700297895,
  0.2209422033993741,
  0.24411143132708624,
  0.784794741706548,
  0.02315318035210223],
 [0.0005815413143029913,
  0.21328311446482218,
  0.6760600800138975,
  0.1732470193501403,
  0.07503795732906815]]

In [25]:
entities = [
    ["NS0110","NS0174"],
    ["Nguyễn_Văn_Thiên_NS0110_PVHHT_ICARE","Trần_Quang_Duy_NS0174_PM_SDS"],
    ["PVHHT_ICARE","PM_SDS"],
    ["Nguyễn Văn Thiên","Trần Quang Duy"],
    [[random.random() for _ in range(8)] for _ in range(2)]
]

In [26]:
collection.insert(entities)

(insert count: 2, delete count: 0, upsert count: 0, timestamp: 442030278610780163, success count: 2, err count: 0)

In [31]:
collection.flush()  

In [34]:
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}
collection.create_index("Embeddings", index)

Status(code=0, message=)

In [37]:
collection.load()
vectors_to_search = entities[-1][-1:]
vectors_to_search

[[0.6443051335990972,
  0.1429983423274741,
  0.2999680385381044,
  0.7486099005179897,
  0.42066033652851087,
  0.38525663634419605,
  0.16433232963295763,
  0.6314241382886112]]

In [39]:

search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 12},
}
results = collection.search(vectors_to_search, "Embeddings", search_params, limit=5, output_fields=["Code","Name_VN"])

In [40]:
for i, result in enumerate(results):
    print("\nSearch result for {}th vector: ".format(i))
    for j, res in enumerate(result):
        print("Top {}: {}".format(j, res))


Search result for 0th vector: 
Top 0: id: NS0174, distance: 0.0, entity: {'Name_VN': 'Trần Quang Duy'}
Top 1: id: NS0110, distance: 1.3214890956878662, entity: {'Name_VN': 'Nguyễn Văn Thiên'}
