# Getting Started with Milvus (Lite)
* [User Docs](https://milvus.io/docs/quickstart.md)


### Install and import dependencies



In [None]:
# !pip install -U pymilvus
!pip install -U "pymilvus[model]"

Collecting pymilvus[model]
  Downloading pymilvus-2.4.7-py3-none-any.whl.metadata (5.6 kB)
Collecting environs<=9.5.0 (from pymilvus[model])
  Downloading environs-9.5.0-py2.py3-none-any.whl.metadata (14 kB)
Collecting ujson>=2.0.0 (from pymilvus[model])
  Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.3 kB)
Collecting milvus-lite<2.5.0,>=2.4.0 (from pymilvus[model])
  Downloading milvus_lite-2.4.10-py3-none-manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting milvus-model>=0.1.0 (from pymilvus[model])
  Downloading milvus_model-0.2.7-py3-none-any.whl.metadata (1.6 kB)
Collecting marshmallow>=3.0.0 (from environs<=9.5.0->pymilvus[model])
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)
Collecting python-dotenv (from environs<=9.5.0->pymilvus[model])
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting onnxruntime (from milvus-model>=0.1.0->pymilvus[model])
  Downloading onnxruntime-1.19.2-cp3

In [None]:
import pymilvus
from pymilvus import MilvusClient

### Open Vector Database

In [None]:
client = MilvusClient("milvus_demo2.db")

DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: dbb90d7dadd04085a30b8baafb5a8c74


### Create a Collection

In [None]:
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")

client.create_collection(
    collection_name="demo_collection",
    dimension=768,  # The vectors we will use in this demo has 768 dimensions
)

DEBUG:pymilvus.milvus_client.milvus_client:Successfully created collection: demo_collection
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created an index on collection: demo_collection


### Embed text into vectors
* [Sentence Transformers](https://huggingface.co/sentence-transformers)

In [None]:
from pymilvus import model

# Uses: "paraphrase-albert-small-v2" (~50MB).
embedding_fn = model.DefaultEmbeddingFunction()

# Text strings to search from.
docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = embedding_fn.encode_documents(docs)

# The output vector has 768 dimensions, matching the collection that we just created.
print("Dim:", embedding_fn.dim, vectors[0].shape)  # Dim: 768 (768,)

# Each entity has id, vector representation, raw text, and a subject label that we use
# to demo metadata filtering later.
data = [
    {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"}
    for i in range(len(vectors))
]

print("Data has", len(data), "entities, each with fields: ", data[0].keys())
print("Vector dim:", len(data[0]["vector"]))



Dim: 768 (768,)
Data has 3 entities, each with fields:  dict_keys(['id', 'vector', 'text', 'subject'])
Vector dim: 768


### Insert embeddings into vector database

In [None]:
res = client.insert(collection_name="demo_collection", data=data)

print(res)

{'insert_count': 3, 'ids': [0, 1, 2]}


### Vector (i.e. Semantic) Search

In [None]:
query_vectors = embedding_fn.encode_queries(["Who is Alan Turing?"])
# If you don't have the embedding function you can use a fake vector to finish the demo:
# query_vectors = [ [ random.uniform(-1, 1) for _ in range(768) ] ]

res = client.search(
    collection_name="demo_collection",  # target collection
    data=query_vectors,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "subject"],  # specifies fields to be returned
)

print(res)

data: ["[{'id': 2, 'distance': 0.5859944820404053, 'entity': {'text': 'Born in Maida Vale, London, Turing was raised in southern England.', 'subject': 'history'}}, {'id': 1, 'distance': 0.5118255019187927, 'entity': {'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}}]"] 


### Search with Metadata Filtering

In [None]:
# Insert more docs in another subject.
docs = [
    "Machine learning has been used for drug design.",
    "Computational synthesis with AI algorithms predicts molecular properties.",
    "DDR1 is involved in cancers and fibrosis.",
]
vectors = embedding_fn.encode_documents(docs)
data = [
    {"id": 3 + i, "vector": vectors[i], "text": docs[i], "subject": "biology"}
    for i in range(len(vectors))
]

client.insert(collection_name="demo_collection", data=data)

# This will exclude any text in "history" subject despite close to the query vector.
res = client.search(
    collection_name="demo_collection",
    data=embedding_fn.encode_queries(["tell me AI related information"]),
    filter="subject == 'biology'",
    limit=2,
    output_fields=["text", "subject"],
)

print(res)

data: ["[{'id': 4, 'distance': 0.27030572295188904, 'entity': {'text': 'Computational synthesis with AI algorithms predicts molecular properties.', 'subject': 'biology'}}, {'id': 3, 'distance': 0.1642588973045349, 'entity': {'text': 'Machine learning has been used for drug design.', 'subject': 'biology'}}]"] 


### Scalar Field Query

In [None]:
res = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["text", "subject"],
)

In [None]:
res = client.query(
    collection_name="demo_collection",
    ids=[0, 2],
    output_fields=["vector", "text", "subject"],
)

### Delete Entities

In [None]:
# Delete entities by primary key
res = client.delete(collection_name="demo_collection", ids=[0, 2])

print(res)

# Delete entities by a filter expression
res = client.delete(
    collection_name="demo_collection",
    filter="subject == 'biology'",
)

print(res)

[0, 2]
[3, 4, 5]


### Load Pre-existing Database

In [None]:
del client

In [None]:
client = MilvusClient("milvus_demo.db")

DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: cdd657d615e945589aae944026cc8f87


In [None]:
res = client.search(
    collection_name="demo_collection",
    data=embedding_fn.encode_queries(["tell me AI related information"]),
    output_fields=["text", "subject"],
)

print(res)

data: ["[{'id': 1, 'distance': 0.4428038001060486, 'entity': {'text': 'Alan Turing was the first person to conduct substantial research in AI.', 'subject': 'history'}}]"] 


### Milvus Standalone and Cluster

In [None]:
# client = MilvusClient(uri="http://localhost:19530", token="root:Milvus")