In [4]:
!pip install --upgrade voyageai pymilvus

Defaulting to user installation because normal site-packages is not writeable


In [26]:
import voyageai
from pymilvus import MilvusClient

MODEL_NAME = "voyage-law-2"  # Which model to use, please check https://docs.voyageai.com/docs/embeddings for available models
DIMENSION = 1024  # Dimension of vector embedding 

# Connect to VoyageAI with API Key.
voyage_client = voyageai.Client(api_key="YOUR_VOYAGEAI_API_KEY")

docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = voyage_client.embed(
        texts=docs,
        model=MODEL_NAME,
        truncation=False
    ).embeddings

# Prepare data to be stored in Milvus vector database.
# We can store the id, vector representation, raw text and labels such as "subject" in this case in Milvus.
data = [ {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"} for i in range(len(docs)) ]


# Connect to Milvus, all data is stored in a local file named "milvus_voyage_demo.db"
# in current directory. You can also connect to a remote Milvus server following this
# instruction: https://milvus.io/docs/install_standalone-docker.md.
milvus_client = MilvusClient("milvus_voyage_demo.db")
COLLECTION_NAME = "demo_collection"  # Milvus collection name
# Create a collection to store the vectors and text. 
milvus_client.create_collection(
    collection_name=COLLECTION_NAME,
    dimension=DIMENSION
)

# Insert all data into Milvus vector database.
res = milvus_client.insert(
    collection_name="demo_collection",
    data=data
)

print(res["insert_count"])

3


In [36]:
queries = ["When was artificial intelligence founded?"]

query_vectors = voyage_client.embed(
        texts=queries,
        model=MODEL_NAME,
        truncation=False
    ).embeddings

res = milvus_client.search(
    collection_name=COLLECTION_NAME,   # target collection
    data=query_vectors,                # query vectors
    limit=2,                           # number of returned entities
    output_fields=["text", "subject"], # specifies fields to be returned
)

for q in queries:
    print('Query:', q)
    for result in res:
        print(result)
    print("\n")

Query: When was artificial intelligence founded?
[{'id': 0, 'distance': -0.7196217775344849, 'entity': {'text': 'Artificial intelligence was founded as an academic discipline in 1956.', 'subject': 'history'}}]
