In [1]:
import os
from typing import List, Dict

from pymilvus import MilvusClient, DataType
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
from dotenv import load_dotenv
load_dotenv()

# ========= CONFIGURATION =========

# Set these as environment variables OR hard-code (not recommended)
MILVUS_HOST = os.getenv("MILVUS_HOST")        # gRPC host from watsonx.data Milvus service
MILVUS_PORT = os.getenv("MILVUS_PORT", "443") # gRPC port from service
MILVUS_API_KEY = os.getenv("MILVUS_API_KEY")  # IBM Cloud API key

# Paths to your PDFs
PUBLIC_PDF_PATH = "offerings_public.pdf"
MANAGERS_PDF_PATH = "offerings_managers_only.pdf"

# Collection names
PUBLIC_COLLECTION = "offerings_public"
MANAGERS_COLLECTION = "offerings_managers_only"

# Embedding model (384 dimensions)
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_DIM = 384  # fixed for this model

# print(MILVUS_HOST)
# print(MILVUS_PORT)
# print(MILVUS_API_KEY)

  from pkg_resources import DistributionNotFound, get_distribution
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def connect_milvus() -> MilvusClient:
    if not (MILVUS_HOST and MILVUS_PORT and MILVUS_API_KEY):
        raise RuntimeError("Set MILVUS_HOST, MILVUS_PORT and MILVUS_API_KEY first.")
	

    milvus_uri = f"https://ibmlhapikey_michal.kordyzon@pl.ibm.com:{MILVUS_API_KEY}@{MILVUS_HOST}:{MILVUS_PORT}"

    client = MilvusClient(
        uri=milvus_uri,
        secure=True,
    )
    return client


In [3]:
# Connect & list collections

from pprint import pprint

client = connect_milvus()
print("Connected to IBM Milvus.")

print("\nCollections in Milvus:")
pprint(client.list_collections())

Connected to IBM Milvus.

Collections in Milvus:
['offerings_public', 'offerings_managers_only']


In [4]:
def debug_search_all_metrics(
    client,
    model,
    collection_name: str,
    query: str,
    top_k: int = 5,
):
    metrics_to_try = ["COSINE", "IP", "L2"]

    for metric in metrics_to_try:
        print("\n" + "="*60)
        print(f"Metric: {metric}")
        print("="*60)
        try:
            results = _semantic_search_collection(
                client=client,
                model=model,
                collection_name=collection_name,
                query=query,
                top_k=top_k,
                metric_type=metric,
            )
            for i, r in enumerate(results, 1):
                print(f"\n---- Result {i} ----")
                print("Score      :", r["score"])
                print("ID         :", r["id"])
                print("Offering ID:", r["offering_id"])
                print("Text       :", r["text"][:200], "...")
        except Exception as e:
            print(f"Error for metric {metric}: {e}")


In [5]:
client = connect_milvus()
model = SentenceTransformer(EMBEDDING_MODEL_NAME)

query = "Travelflex"
debug_search_all_metrics(
    client=client,
    model=model,
    collection_name=PUBLIC_COLLECTION,  # or PRIVATE_COLLECTION
    query=query,
    top_k=5,
)


Metric: COSINE
Error for metric COSINE: name '_semantic_search_collection' is not defined

Metric: IP
Error for metric IP: name '_semantic_search_collection' is not defined

Metric: L2
Error for metric L2: name '_semantic_search_collection' is not defined
