In [1]:
%pip install pymilvus


Collecting pymilvus
  Using cached pymilvus-2.6.3-py3-none-any.whl.metadata (6.6 kB)
Collecting setuptools>69 (from pymilvus)
  Using cached setuptools-80.9.0-py3-none-any.whl.metadata (6.6 kB)
Collecting grpcio!=1.68.0,!=1.68.1,!=1.69.0,!=1.70.0,!=1.70.1,!=1.71.0,!=1.72.1,!=1.73.0,>=1.66.2 (from pymilvus)
  Using cached grpcio-1.76.0-cp313-cp313-macosx_11_0_universal2.whl.metadata (3.7 kB)
Collecting orjson>=3.10.15 (from pymilvus)
  Using cached orjson-3.11.4-cp313-cp313-macosx_15_0_arm64.whl.metadata (41 kB)
Collecting protobuf>=5.27.2 (from pymilvus)
  Using cached protobuf-6.33.0-cp39-abi3-macosx_10_9_universal2.whl.metadata (593 bytes)
Collecting python-dotenv<2.0.0,>=1.0.1 (from pymilvus)
  Using cached python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Collecting pandas>=1.2.4 (from pymilvus)
  Using cached pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting typing-extensions~=4.12 (from grpcio!=1.68.0,!=1.68.1,!=1.69.0,!=1.70.0,!=1.70.1,!=1.71.0,!=1.7

### Connect to Milvus

In [2]:
from pymilvus import connections

connections.connect("default", host="127.0.0.1", port="19530")


### Create a Collection like table

In [3]:
from pymilvus import FieldSchema, CollectionSchema, DataType, Collection

# Define schema fields
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="title", dtype=DataType.VARCHAR, max_length=200),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=4)
]

schema = CollectionSchema(fields, description="Simple demo collection")

# Create collection
collection = Collection(name="demo_collection", schema=schema)

In [4]:
from pymilvus import utility
print(utility.list_collections())  # list all

['demo_collection']


In [5]:
print(utility.has_collection("demo_collection"))

# Get details about a specific collection
# Get collection details
collection = Collection("demo_collection")  # instantiate the collection object
print(collection.schema)                    # show the schema
print(collection.num_entities)              # number of entities
print(collection.description)               # optional

True
{'auto_id': True, 'description': 'Simple demo collection', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'title', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'embedding', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 4}}], 'enable_dynamic_field': False}
0
Simple demo collection


### Insert Data (like INSERT INTO)

In [6]:
data = [
    ["Intro to AI", "Deep Learning", "Vector Databases"],
    [
        [0.1, 0.2, 0.3, 0.4],
        [0.2, 0.3, 0.4, 0.5],
        [0.3, 0.4, 0.5, 0.6],
    ]
]

collection.insert(data)
collection.flush()
print(f"Number of entities after insertion: {collection.num_entities}")

Number of entities after insertion: 3


### Create an index(for fast vector search)

In [8]:
index_params = {
    "metric_type": "L2",      # or "COSINE"
    "index_type": "IVF_FLAT", # fast approximate search
    "params": {"nlist": 64}
}
collection.create_index(field_name="embedding", index_params=index_params)

Status(code=0, message=)

### Query Metadata (like SQL SELECT * WHERE)

In [9]:
collection.load()
results = collection.query(
    expr="",
   #  expr="title == 'Deep Learning'",
    output_fields=["title", "embedding"],
    limit=10
)

for result in results:
    print(result)

{'title': 'Intro to AI', 'embedding': [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, 0.4000000059604645], 'id': 462066725594345875}
{'title': 'Deep Learning', 'embedding': [0.20000000298023224, 0.30000001192092896, 0.4000000059604645, 0.5], 'id': 462066725594345876}
{'title': 'Vector Databases', 'embedding': [0.30000001192092896, 0.4000000059604645, 0.5, 0.6000000238418579], 'id': 462066725594345877}
{'title': 'Intro to AI', 'embedding': [0.10000000149011612, 0.20000000298023224, 0.30000001192092896, 0.4000000059604645], 'id': 462066725594345878}
{'title': 'Deep Learning', 'embedding': [0.20000000298023224, 0.30000001192092896, 0.4000000059604645, 0.5], 'id': 462066725594345879}
{'title': 'Vector Databases', 'embedding': [0.30000001192092896, 0.4000000059604645, 0.5, 0.6000000238418579], 'id': 462066725594345880}


### Search by Vector (the heart of Milvus)

In [10]:
import numpy as np

query_vector = [[0.15, 0.25, 0.35, 0.45]]

search_params = {"metric_type": "L2", "params": {"nprobe": 10}}

results = collection.search(
    data=query_vector,
    anns_field="embedding", # approximate nearest neighbor search field
    param=search_params,
    limit=2,
    output_fields=["title"]
)
# select title from demo_collection where embeddding is nearest to query_vector
# select title, embedding from demo_collection where embeddding is nearest to query_vector

for hit in results[0]:
    print(f"Matched: {hit.entity.get('title')}, Distance: {hit.distance}")

Matched: Intro to AI, Distance: 0.009999996051192284
Matched: Deep Learning, Distance: 0.01000000350177288


### List & Drop Collections

In [11]:
from pymilvus import utility

print(f"List of collection BEFORE dropping the collection: {utility.list_collections()}")   # list all
utility.drop_collection("demo_collection")  # delete one
print(f"List of collection after dropping the collection: {utility.list_collections()}")  # list all again to confirm deletion


List of collection BEFORE dropping the collection: ['demo_collection']
List of collection after dropping the collection: []
