# Program to upload data into Milvus
# !pip install -U pymilvus
# !pip install "pymilvus[model]"

In [1]:
from pymilvus import MilvusClient, model,connections, db

In [2]:
milHost="192.168.1.44"
milURI='http://192.168.1.44:19530'
milDBname="milvus_demo"
modelDimention=768

In [3]:
# Create DB only once
#conn = connections.connect(host="192.168.1.44", port=19530)
#database = db.create_database("milvus_demo")

In [5]:
client = MilvusClient(uri=milURI,db_name=milDBname, overwrite=True)

In [6]:
#Create a Collection (something like a "table" in RDBMS)
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
client.create_collection(
    collection_name="demo_collection",
    dimension=modelDimention,  # 768 dimensions, This should come from the "embedding model that we use"
)

In [7]:
#List Collections
client.list_collections()

['demo_collection']

In [8]:
# Default embedding model
embedding_fn = model.DefaultEmbeddingFunction()

In [9]:
# Text strings to search from.
docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]


In [10]:
vectors = embedding_fn.encode_documents(docs)
# The output vector has 768 dimensions, matching the collection that we just created.
print("Dim:", embedding_fn.dim, vectors[0].shape)  # Dim: 768 (768,)

Dim: 768 (768,)


In [11]:
# Each entity has id, vector representation, raw text, and a subject label that we use
# to demo metadata filtering later.
data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

In [12]:
print("Data has", len(data), "entities, each with fields: ", data[0].keys())

Data has 3 entities, each with fields:  dict_keys(['id', 'vector', 'text', 'subject'])


In [13]:
print("Vector dim:", len(data[0]["vector"]))

Vector dim: 768
