In [None]:
# Milvus on Zilliz Cloud
# This notebook shows how to connect directly to a cloud‑hosted Milvus database (Zilliz Cloud) and perform:

# Connection & authentication
# Collection inspection
# CRUD operations
# Vector similarity search
# Visual exploration of embeddings (2D)
# Designed for teaching Milvus concepts, not local Docker setups.

# 1 Install & Import Dependencies

%pip install pymilvus

In [None]:
%pip install numpy matplotlib

In [None]:
#2 Connect to Zilliz Cloud (Milvus)

import os
from dotenv import load_dotenv
# from pymilvus import connections

# # If using Docker standalone Milvus
# connections.connect("default", host="127.0.0.1", port="19530")

from pymilvus import connections

load_dotenv(override=True, dotenv_path="../.env.local")

milvus_uri = os.getenv("MILVUS_URI")
milvus_token = os.getenv("MILVUS_API_KEY")


connections.connect(
    alias="default",
    uri=milvus_uri,
    token=milvus_token
)

print("Connected to Milvus on Zilliz Cloud")

In [None]:
#3 Inspect Collections
from pymilvus import utility

utility.list_collections()

In [9]:
#4 Load  & Inspect a Collection

from pymilvus import Collection

collection = Collection("demo_collection")
collection.load()

collection.schema

{'auto_id': False, 'description': 'demo collection', 'fields': [{'name': 'id', 'description': 'The Primary Key', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 4}}, {'name': 'title', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}], 'enable_dynamic_field': True, 'enable_namespace': False}

In [None]:
#5 Read Data (Query)

results = collection.query(
    expr="id >= 0",
    output_fields=["id", "title", "embedding"],
    limit=5
)

results

In [None]:
# 6 Insert (CREATE)
# Insert a new vector record

import numpy as np

data = [
    [np.random.random(4).tolist()],            # vector FIRST
    ["Milvus makes vector search scalable"]     # title SECOND
]

collection.insert(data)
collection.flush()

In [None]:
# 7 Update (DELETE + INSERT pattern)
# Milvus does not support in‑place updates.

collection.delete(expr="id == 463705163763347399")
collection.flush()

updated_data = [
    [
        [0.7000895, 0.022113776, 0.48144588, 0.23203984]
    ],  # ← list of vectors (1 row)
    [
        "Deep Learning (Updated)"
    ]   # ← list of titles (1 row)
]

result = collection.insert(updated_data)
collection.flush()

new_id = result.primary_keys[0]
print(f"Record updated. New ID generated: {new_id}")

In [None]:
#Delete

collection.delete(expr="id == 463705164234735372")
collection.flush()

print("Record deleted")


results = collection.query(
    expr="id == 463705164234735372",
    output_fields=["id", "title", "embedding"],
    limit=5
)

results

In [None]:
#9 Vector Similarity Search

query_vector = np.random.random(4).tolist()
print(f"Query Vector: {query_vector}")
search_results = collection.search(
    data=[query_vector],
    anns_field="vector",
    param={"metric_type": "COSINE", "params": {"nprobe": 10}},
    limit=5,
    output_fields=["title"]
)

for hit in search_results[0]:
    print(f"id={hit.id}, score={hit.score}, title={hit.entity.get('title')}")

In [None]:

# 10 Visualizing Embeddings (PCA)
# This helps students see vector similarity

import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# 1️⃣ Pull vectors + titles from Milvus
data = collection.query(
    expr="id >= 0",
    output_fields=["vector", "title"],
    limit=100
)

vectors = np.array([d["vector"] for d in data])
titles = [d["title"] for d in data]

# 2️⃣ Reduce vectors to 2D using PCA
pca = PCA(n_components=2)
reduced = pca.fit_transform(vectors)

# 3️⃣ Color points by title
unique_titles = list(set(titles))
colors = plt.cm.tab10(range(len(unique_titles)))
color_map = dict(zip(unique_titles, colors))

plt.figure(figsize=(8,6))

for i, title in enumerate(titles):
    plt.scatter(
        reduced[i, 0],
        reduced[i, 1],
        color=color_map[title],
        alpha=0.7,
        label=title
    )

# Remove duplicate legend entries
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(), fontsize=8)

plt.title("Vector Visualization (PCA, Colored by Title)")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()