In [1]:
from pymilvus import AnnSearchRequest

In [2]:
search_text = 'Section 437 of H MC Act Award costs'

In [3]:
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="phi3")

In [4]:
query_vector = embeddings.embed_query(search_text)
query_vector

[-0.662993311882019,
 1.3389774560928345,
 1.608642816543579,
 2.629030704498291,
 0.4607219099998474,
 0.5898449420928955,
 -1.8966244459152222,
 1.0676032304763794,
 0.3847752511501312,
 1.2403841018676758,
 -0.31364351511001587,
 1.6640640497207642,
 -1.3513212203979492,
 0.11910887807607651,
 1.6467162370681763,
 -1.1944955587387085,
 0.8214799165725708,
 2.597777843475342,
 -0.9678321480751038,
 0.8486834168434143,
 0.0014776787720620632,
 0.3605087697505951,
 0.3246802091598511,
 3.236917495727539,
 0.8193278908729553,
 -0.14206048846244812,
 0.03140873461961746,
 -0.12274175137281418,
 0.7621447443962097,
 -0.7797073721885681,
 1.1187434196472168,
 -0.9051377773284912,
 -0.5302390456199646,
 0.6423667669296265,
 2.6664230823516846,
 -1.2604446411132812,
 1.481611967086792,
 -1.3693121671676636,
 0.7831668257713318,
 -3.82476806640625,
 0.6746876835823059,
 -2.6405746936798096,
 -0.980459988117218,
 0.9695890545845032,
 -3.9123754501342773,
 -0.2673811912536621,
 0.36769536137580

In [5]:
search_param_1 = {
    "data": [query_vector], # Query vector
    "anns_field": "judgementVector", # Vector field name
    "param": {
        "metric_type": "L2", # This parameter value must be identical to the one used in the collection schema
        "params": {"nprobe": 10}
    },
    "limit": 10 # Number of search results to return in this AnnSearchRequest
}

In [6]:
req1 = AnnSearchRequest(**search_param_1)

In [7]:
# Store these two requests as a list in `reqs`
reqs = [req1]

In [8]:
# from pymilvus import WeightedRanker
# # Use WeightedRanker to combine results with specified weights
# # Assign weights of 0.8 to text search and 0.2 to image search
# rerank = WeightedRanker(0.8) 
from pymilvus import RRFRanker

rerank = RRFRanker()

In [9]:
from pymilvus import connections, Collection

In [10]:
# Connect to Milvus
connections.connect(
    host="localhost", # Replace with your Milvus server IP
    port="19530"
)

In [11]:
# Create collection
collection = Collection(name="judgement_collection")

In [12]:
# Before conducting hybrid search, load the collection into memory.
collection.load()

In [13]:
res = collection.hybrid_search(
    reqs, # List of AnnSearchRequests created in step 1
    rerank, # Reranking strategy specified in step 2
    limit=5 # Number of final search results to return
)

In [14]:
print(res)

data: ["['id: 451835641854528249, distance: 0.016393441706895828, entity: {}', 'id: 451835641853171234, distance: 0.016129031777381897, entity: {}', 'id: 451835641847719549, distance: 0.01587301678955555, entity: {}', 'id: 451835641852923597, distance: 0.015625, entity: {}', 'id: 451835641847751261, distance: 0.015384615398943424, entity: {}']"]


In [20]:
import ast
import re

# The given data string
data_str = str(res[0])  # Extract the string from the list
data_list = ast.literal_eval(data_str)  # Convert the string to an actual list

# Extracting IDs using regular expressions
ids = [int(re.search(r'id: (\d+)', entry).group(1)) for entry in data_list]

In [27]:
# Query Milvus for the documents with the specified IDs
results = collection.query(
    expr=f"id in {ids}",  # Assuming "prayerId" is the field storing the IDs
    output_fields=["id",'judgementFileName']  # Specify the fields you want to retrieve
)

In [29]:
file_names = []
for i in results:
    file_names.append(i['judgementFileName'])

In [34]:
# Query Milvus for the documents with the specified IDs
results = collection.query(
    expr=f"judgementFileName in {file_names}",  # Assuming "prayerId" is the field storing the IDs
    output_fields=["id", "judgementText","judgementChunkId",'judgementFileName']  # Specify the fields you want to retrieve
)

In [35]:
for i in results:
    print(i)

{'id': 451835641847719516, 'judgementText': 'This Civil Miscellaneous Appeal has been filed by the claimants for\nenhancement of compensation awarded by the Tribunal, by modifying the\naward passed in M.C.O.P.No.80 of 2013 dated 24.10.2016, on the file of the\nSpecial District Judge, to deal with MCOP Cases, Villupuram.\n2. The appellants herein are the claimants in M.C.O.P.No.80 of 2013, on', 'judgementChunkId': 0, 'judgementFileName': '638543.pdf'}
{'id': 451835641847719517, 'judgementText': 'the file of the Special District Judge, to deal with the MCOP Cases,\nVillupuram. They have filed the above said claim petition, claiming a sum of\nRs.45,00,000/- as compensation for the death of one Bakthinarayanan, who\ndied in the road accident that took place on 23.07.2009.\n3. The facts which related to the filing of the present appeal are that on', 'judgementChunkId': 1, 'judgementFileName': '638543.pdf'}
{'id': 451835641847719518, 'judgementText': '23.07.2009 at about 8.40 hours, while th

In [36]:
import json
from collections import defaultdict
combined_text = defaultdict(lambda: defaultdict(str))
for entry in results:
    filename = entry['judgementFileName']
    chunk_id = entry['judgementChunkId']
    text = entry['judgementText']
    combined_text[filename][chunk_id] += text + " "  # Add a space for separation between texts

# Create a list to store the final combined texts
result = []

# Combine the chunks in the correct order and format the result
for filename, chunks in combined_text.items():
    combined_texts = ''.join(chunks[i] for i in sorted(chunks.keys()))  # Sort chunks by chunk_id
    result.append({'judgementFileName': filename, 'combinedJudgementText': combined_texts.strip()})

# Print the result as JSON
print(json.dumps(result, indent=4))

[
    {
        "judgementFileName": "638543.pdf",
        "combinedJudgementText": "This Civil Miscellaneous Appeal has been filed by the claimants for\nenhancement of compensation awarded by the Tribunal, by modifying the\naward passed in M.C.O.P.No.80 of 2013 dated 24.10.2016, on the file of the\nSpecial District Judge, to deal with MCOP Cases, Villupuram.\n2. The appellants herein are the claimants in M.C.O.P.No.80 of 2013, on the file of the Special District Judge, to deal with the MCOP Cases,\nVillupuram. They have filed the above said claim petition, claiming a sum of\nRs.45,00,000/- as compensation for the death of one Bakthinarayanan, who\ndied in the road accident that took place on 23.07.2009.\n3. The facts which related to the filing of the present appeal are that on 23.07.2009 at about 8.40 hours, while the deceased was waiting to cross the\nservice road in his motorcycle bearing registration No. PY01 AR 4033 on the\nhttps://www.mhc.tn.gov.in/judis\n2/14\nC.M.A.No.1662 of 