In [1]:
from pymilvus import MilvusClient, DataType

client = MilvusClient(
    uri="http://localhost:19530"
)

client.create_collection(
    collection_name="medagents",
    dimension=768,
    metric_type="IP",  # Inner product distance
    consistency_level="Strong",  # Strong consistency level
)


model_q = AutoModel.from_pretrained("ncbi/MedCPT-Query-Encoder").to(device)
tokenizer_q = AutoTokenizer.from_pretrained("ncbi/MedCPT-Query-Encoder")

def medcpt_query_embedding_function(docs):
    encoded = tokenizer_q(
        docs,
        truncation=True,
        padding=True,
        return_tensors='pt',
        max_length=512,
    )
    encoded = {k: v.to(device) for k, v in encoded.items()}
    with torch.no_grad():
        embeds = model_q(**encoded).last_hidden_state[:, 0, :]
    embeds = embeds.cpu().numpy()
    return embeds[0].tolist()

search_res = client.search(
    collection_name='medagents',
    data=[
        medcpt_query_embedding_function('A junior orthopaedic surgery resident is completing a carpal tunnel repair with the department chairman as the attending physician. During the case, the resident inadvertently cuts a flexor tendon. The tendon is repaired without complication. The attending tells the resident that the patient will do fine, and there is no need to report this minor complication that will not harm the patient, as he does not want to make the patient worry unnecessarily. He tells the resident to leave this complication out of the operative report. Which of the following is the correct next action for the resident to take?')
    ],  
    limit=3,  # Return top 3 results
    search_params={"metric_type": "IP", "params": {}},  # Inner product distance
    output_fields=["text"],  # Return the text field
)

In [None]:
database = db.create_database("my_database")

In [None]:
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")


In [15]:
res = client.search(
    collection_name="demo_collection",  # target collection
    data=query_vectors,  # query vectors
    limit=2,  # number of returned entities
    output_fields=["text", "subject"],  # specifies fields to be returned
)


res = client.search(
    collection_name="demo_collection",
    data=embedding_fn.encode_queries(["tell me AI related information"]),
    filter="subject == 'biology'",
    limit=2,
    output_fields=["text", "subject"],
)

data: ["[{'id': 5, 'distance': 0.05801853910088539, 'entity': {'text': 'DDR1 is involved in cancers and fibrosis.', 'subject': 'biology'}}]"] 
