In [1]:
!pip install -U pymilvus

Collecting pymilvus
  Downloading pymilvus-2.5.6-py3-none-any.whl.metadata (5.7 kB)
Collecting grpcio<=1.67.1,>=1.49.1 (from pymilvus)
  Downloading grpcio-1.67.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting python-dotenv<2.0.0,>=1.0.1 (from pymilvus)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting ujson>=2.0.0 (from pymilvus)
  Downloading ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)
Collecting milvus-lite>=2.4.0 (from pymilvus)
  Downloading milvus_lite-2.4.12-py3-none-manylinux2014_x86_64.whl.metadata (10.0 kB)
Downloading pymilvus-2.5.6-py3-none-any.whl (223 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.4/223.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading grpcio-1.67.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m51.9 M

In [2]:
from pymilvus import MilvusClient, DataType

client = MilvusClient('sample.db')

In [4]:
# from pymilvus import MilvusClient, DataType

# client = MilvusClient('sample.db')

# Define collection schema
schema = client.create_schema(
    auto_id=False,
    enable_dynamic_schema=True,
)

schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=5)
schema.add_field(field_name="age", datatype=DataType.INT64, nullable=True) # Nullable field

# Set index params
index_params = client.prepare_index_params()
index_params.add_index(field_name="vector", index_type="IVF_FLAT", metric_type="L2", params={ "nlist": 128 })

# Create collection
client.create_collection(collection_name="user_profiles_null", schema=schema, index_params=index_params)


In [9]:
client.describe_collection

In [10]:
client.describe_index

In [11]:
client.describe_database

In [13]:
data = [
    {"id": 1, "vector": [0.1, 0.2, 0.3, 0.4, 0.5], "age": 30},
    {"id": 2, "vector": [0.2, 0.3, 0.4, 0.5, 0.6], "age": None},
    {"id": 3, "vector": [0.3, 0.4, 0.5, 0.6, 0.7]}
]

client.insert(collection_name="user_profiles_null", data=data)

{'insert_count': 3, 'ids': [1, 2, 3], 'cost': 0}

In [14]:
res = client.search(
    collection_name="user_profiles_null",
    data=[[0.1, 0.2, 0.4, 0.3, 0.128]],
    limit=2,
    search_params={"params": {"nprobe": 16}},
    output_fields=["id", "age"]
)

print(res)

data: ["[{'id': 1, 'distance': 0.15838398039340973, 'entity': {'age': 30, 'id': 1}}, {'id': 2, 'distance': 0.28278401494026184, 'entity': {'age': 136962888253504, 'id': 2}}]"]


In [15]:
results = client.query(
    collection_name="user_profiles_null",
    filter="age >= 0",
    output_fields=["id", "age"]
)


In Query Null Values are ommited

In [16]:
print(results)

data: ["{'id': 1, 'age': 30}", "{'id': 2, 'age': 136962888253504}", "{'id': 3, 'age': 136962888256576}"]


In [17]:
null_results = client.query(
    collection_name="user_profiles_null",
    filter="", # Query without any filtering condition
    output_fields=["id", "age"],
    limit=10 # `limit` parameter is required when using `query` method without filtering condition
)

Query withhout filter attribute return all data with null values also

In [18]:
print(null_results)

data: ["{'id': 1, 'age': 30}", "{'id': 2, 'age': 136962888253504}", "{'id': 3, 'age': 136962888256576}"]


default_value parameter

In [3]:
schema = client.create_schema(
    auto_id=False,
    enable_dynamic_schema=True,
)

schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=5)
schema.add_field(field_name="age", datatype=DataType.INT64, default_value=18)
schema.add_field(field_name="status", datatype=DataType.VARCHAR, default_value="active", max_length=10)

index_params = client.prepare_index_params()
index_params.add_index(field_name="vector", index_type="IVF_FLAT", metric_type="L2", params={ "nlist": 128 })

client.create_collection(collection_name="user_profiles_default", schema=schema, index_params=index_params)


In [None]:
data = [
    {"id": 1, "vector": [0.1, 0.2,0.0,0.0,0.128], "age": 30, "status": "premium"},
    {"id": 2, "vector": [0.2, 0.3,0.0,0.0,0.129]},
    {"id": 3, "vector": [0.3, 0.4,0.0,0.0,0.130], "age": 25, "status": None},
    {"id": 4, "vector": [0.4, 0.5,0.0,0.0,0.131], "age": None, "status": "inactive"}
]

client.insert(collection_name="user_profiles_default", data=data)