### Qdrant Client

In [10]:
# # --- 2. Initialize Qdrant Client and Create Collection ---
from qdrant_client import QdrantClient, models
# using qdrant docker container

client = QdrantClient(url="http://localhost:6333")
collection_name = "hybrid_search_collection"


# USE ONLY IF RUNNING FOR THE FIRST TIME

print(f"\nCreating Qdrant collection: '{collection_name}'")
client.recreate_collection(
    collection_name=collection_name,
    vectors_config={
        "dense_vector": models.VectorParams(size=1024, distance=models.Distance.COSINE)
    },
    sparse_vectors_config={
        "sparse_vector": models.SparseVectorParams(
            index=models.SparseIndexParams(on_disk=False)
        )
    }
)
print("Collection created successfully.")



Creating Qdrant collection: 'hybrid_search_collection'


  client.recreate_collection(


Collection created successfully.


### Data

In [8]:
# data 
import os
docs= []
dates = []
path = "data/Object_casedocs/"
for file in os.listdir(path):
    if file.endswith(".txt"):
        # store the file contents in docs
        data = open(path + file).read()
        docs.append(data)
        # put 4th line of the file in dates
        dates.append(data.split("\n")[3])

print("Total number of documents: ", len(docs))


print("Removing documents without dates")
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

# if any date doesnt contain any of the month, remove that entry from dates and docs

for id, date in enumerate(dates):
    # replace month with number in dates and final format is year-month-day
    if any(month in date for month in months):
        continue
    else:
        docs.pop(id)
        dates.pop(id)

print("Total number of documents: ", len(docs))
#convert dates into UTC timestamps example data '26 September 1973'

print("Converting dates to UTC timestamps")
# convert months to number 
months_to_number = {"January": 1, "February": 2, "March": 3, "April": 4, "May": 5, "June": 6, "July": 7, "August": 8, "September": 9, "October": 10, "November": 11, "December": 12}
for id, date in enumerate(dates):
    # replace month with number in dates and final format is year-month-day
    date_split = date.strip().replace("  ",' ').split(" ")
    date_split[1] = str(months_to_number[date_split[1]])
    dates[id] = date_split[2] + "-" + date_split[1] + "-" + date_split[0]
print("Successfully converted dates to UTC timestamps")
# print(dates)

# sample 100 docs and dates
docs = docs[:100]
dates = dates[:100]
print("Sampling only first 100 documents from the clean docs for this experiment")

# get titles
titles=[]
for doc in docs:
    title = doc.split("\n")[0]
    titles.append(title)
titles[:5]
print("Stored titles")

# get metadata
metadata = []
for id, title in enumerate(titles):
    metadata.append({"case_id": "C-" + str(id+1), "date": dates[id]})

print("Stored metadata")

Total number of documents:  2914
Removing documents without dates
Total number of documents:  2904
Converting dates to UTC timestamps
Successfully converted dates to UTC timestamps
Sampling only first 100 documents from the clean docs for this experiment
Stored titles
Stored metadata


### Ingestion

#### Embedders

In [11]:
import torch
from qdrant_client import models
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForMaskedLM, AutoTokenizer

# --- 1. Initialize Open Source Models ---

print("Initializing models...")
# a) Dense Vector Model (384 dimensions)
dense_model = SentenceTransformer('BAAI/bge-large-en-v1.5', device='cuda' if torch.cuda.is_available() else 'cpu')
DENSE_VECTOR_SIZE = dense_model.get_sentence_embedding_dimension()
print(f"Dense model loaded. Vector size: {DENSE_VECTOR_SIZE}")

# b) Sparse Vector Model (SPLADE)
sparse_model_id = 'naver/splade-cocondenser-ensembledistil'
sparse_tokenizer = AutoTokenizer.from_pretrained(sparse_model_id)
sparse_model = AutoModelForMaskedLM.from_pretrained(sparse_model_id)
if torch.cuda.is_available():
    sparse_model.to('cuda')
sparse_model.eval()
print("Sparse model (SPLADE) loaded.")

  from .autonotebook import tqdm as notebook_tqdm


Initializing models...
Dense model loaded. Vector size: 1024


BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


Sparse model (SPLADE) loaded.


In [12]:
def generate_splade_sparse_vector(text: str) -> models.SparseVector:
    """Generates a SPLADE sparse vector for a given text."""
    tokens = sparse_tokenizer(text, return_tensors='pt', truncation=True, padding=True) # Add truncation/padding
    if torch.cuda.is_available():
        tokens = {k: v.to('cuda') for k, v in tokens.items()}

    with torch.no_grad():
        logits = sparse_model(**tokens).logits

    # Apply ReLU and log, then perform max pooling over the token dimension (dim=1)
    # This reduces the (batch_size, sequence_length, vocab_size) to (batch_size, vocab_size)
    # For a single text, it becomes (vocab_size,) after squeeze()
    vec = torch.log(1 + torch.relu(logits)).max(dim=1).values.squeeze()

    # Ensure vec is 1D, which it should be after the above steps for a single input text
    if vec.dim() > 1:
        raise ValueError(f"Expected a 1D tensor for vec, but got {vec.shape}")

    # Get indices and values of non-zero elements
    # .nonzero() on a 1D tensor returns a 2D tensor of shape (num_non_zeros, 1)
    # .squeeze() will correctly turn (num_non_zeros, 1) into (num_non_zeros,)
    non_zero_indices = vec.nonzero().squeeze(dim=-1) # Squeeze last dim explicitly

    # Handle the case where squeeze might return a scalar if only one non-zero
    if non_zero_indices.dim() == 0: # If it's a scalar tensor
        indices = [non_zero_indices.item()]
        values = [vec[non_zero_indices].item()]
    else:
        indices = non_zero_indices.cpu().tolist()
        values = vec[non_zero_indices].cpu().tolist()

    return models.SparseVector(indices=indices, values=values)

In [None]:
#example
generate_splade_sparse_vector('legal documents')

SparseVector(indices=[2192, 2231, 2375, 2457, 2576, 2592, 2726, 3235, 3423, 3648, 3661, 4205, 4482, 4981, 5074, 5160, 5371, 5416, 5491, 5523, 6206, 6254, 6426, 6764, 6796, 7010, 7063, 7099, 7450, 7816, 8170, 8744, 9385, 9894, 11091, 12653, 15359, 18001, 18777], values=[0.038346972316503525, 0.2988216280937195, 1.2188657522201538, 0.04518304765224457, 0.09444350004196167, 0.2894461452960968, 0.12064705044031143, 0.07816712558269501, 2.5761172771453857, 0.4955641031265259, 0.322893887758255, 0.05144835636019707, 0.23672449588775635, 1.5413520336151123, 0.013036026619374752, 1.6230740547180176, 0.49325647950172424, 0.09534130245447159, 2.60306715965271, 0.7195536494255066, 0.1041899025440216, 2.2376840114593506, 0.23321685194969177, 0.717644453048706, 0.18227262794971466, 0.13698546588420868, 0.24635086953639984, 0.06411391496658325, 0.20852689445018768, 0.36267027258872986, 0.3207723796367645, 0.0248417928814888, 0.09357268363237381, 0.041070833802223206, 0.15809932351112366, 0.247266963

#### Upsert Data

In [14]:
# --- 3. Prepare and Upsert Data ---
print("\nProcessing and upserting data...")
points_to_upsert = []
for i, (doc, title, meta) in enumerate(zip(docs, titles, metadata)):
    # Generate dense vector from the main document content
    dense_vector = dense_model.encode(doc).tolist()
   
    # Generate sparse vector from the title for keyword matching
    sparse_vector = generate_splade_sparse_vector(title)
   
    # Create the point with named vectors and payload
    points_to_upsert.append(
        models.PointStruct(
            id=i + 1,
            vector={
                "dense_vector": dense_vector,
                "sparse_vector": sparse_vector
            },
            payload=meta
        )
    )


client.upsert(
    collection_name=collection_name,
    points=points_to_upsert,
    wait=True
)
print(f"Upserted {len(points_to_upsert)} points into the collection.")


Processing and upserting data...
Upserted 100 points into the collection.


### Hybrid Search (Dense + Sparse)

In [15]:
def search(query, date_filter):
    """
    Run this tool to do hybrid search for a given query.
    Returns the top 3 relevant documents.
    Args:
        query (str): The query to search for.
        date_filter (dict): Optional filter for date range. {start: "1973-02-08T10:49:00Z", end: "2024-01-31 10:14:31Z"} default end date is 2025-07-26 and start date is 1901-01-01
    Returns:
        Context: The context based on the query
    """
    print(f"\nPerforming hybrid search for query: '{query}'")
    
    if not isinstance(date_filter, dict):
        date_filter = {}
    date_filter_structured = {
            "must": {
                "key": "date",
                "range": {
                    "gt": date_filter.get("start", "1901-01-01T00:00:00Z"),
                    "gte": None,
                    "lt": None,
                    "lte": date_filter.get("end", "2025-07-26T00:00:00Z")
                }
            }
        }
    
            

    # a) Generate vectors for the query
    query_dense_vector = dense_model.encode(query).tolist()
    query_sparse_vector = generate_splade_sparse_vector(query)

    # b) Create two separate search requests
    dense_request = models.SearchRequest(
        vector={
            "name": "dense_vector",
            "vector": query_dense_vector
        },
        limit=2,
        with_payload=True,
        filter=date_filter_structured,
    )

    sparse_request = models.SearchRequest(
        vector={
            "name": "sparse_vector",
            "vector": query_sparse_vector
        },
        limit=2,
        with_payload=True,
        filter=date_filter_structured,
    )

    # c) Perform the batch search
    results = client.search_batch(
        collection_name=collection_name,
        requests=[dense_request, sparse_request]
    )

    dense_results = results[0]
    sparse_results = results[1]

    print("\n--- Dense Search Results (Semantic) ---")
    for hit in dense_results:
        print(f"ID: {hit.id}, Score: {hit.score:.4f}, Payload: {hit.payload}")

    print("\n--- Sparse Search Results (Keyword) ---")
    for hit in sparse_results:
        print(f"ID: {hit.id}, Score: {hit.score:.4f}, Payload: {hit.payload}")

    # d) Fuse the results using Reciprocal Rank Fusion (RRF)
    fused_results = reciprocal_rank_fusion([dense_results, sparse_results])

    print("\n--- Fused Hybrid Search Results (RRF) ---")
    for doc_id, score in fused_results:
        original_doc = client.retrieve(
            collection_name=collection_name,
            ids=[doc_id]
        )
        print(f"ID: {doc_id}, Fused Score: {score:.4f}, Payload: {original_doc[0].payload}")
    
    # return doc from docs list using index as doc_id as Documnent 1 : docs[0] \n\n Document 2 : docs[1]

    context = """"""
    count = 0
    for doc_id, score in fused_results:
        context = context + "Document " + str(count+1) + ": " + str(docs[doc_id-1]) + "\n\n"
        count = count + 1
    return context
    

def reciprocal_rank_fusion(search_results_list, k=60):
    fused_scores = {}
    for results in search_results_list:
        for rank, hit in enumerate(results):
            doc_id = hit.id
            if doc_id not in fused_scores:
                fused_scores[doc_id] = 0
            fused_scores[doc_id] += 1 / (k + rank)
    reranked_results = sorted(fused_scores.items(), key=lambda item: item[1], reverse=True)
    return reranked_results


In [16]:
# Run the search
search("cases vs government of india", date_filter={"start": "1973-02-08T10:49:00Z", "end": "2024-01-31 10:14:31Z"})



Performing hybrid search for query: 'cases vs government of india'

--- Dense Search Results (Semantic) ---
ID: 57, Score: 0.7304, Payload: {'case_id': 'C-57', 'date': '2004-4-12'}
ID: 90, Score: 0.7122, Payload: {'case_id': 'C-90', 'date': '1987-9-18'}

--- Sparse Search Results (Keyword) ---
ID: 13, Score: 15.3612, Payload: {'case_id': 'C-13', 'date': '1994-10-7'}
ID: 57, Score: 15.1042, Payload: {'case_id': 'C-57', 'date': '2004-4-12'}

--- Fused Hybrid Search Results (RRF) ---
ID: 57, Fused Score: 0.0331, Payload: {'case_id': 'C-57', 'date': '2004-4-12'}
ID: 13, Fused Score: 0.0167, Payload: {'case_id': 'C-13', 'date': '1994-10-7'}
ID: 90, Fused Score: 0.0164, Payload: {'case_id': 'C-90', 'date': '1987-9-18'}


  results = client.search_batch(




### Hybrid Search Agent

In [None]:
# agent to search and retrieve relevant documents
# give the results obtained via hybrid search to the agent and get the response, the agent will decide the params for the next query

from llama_index.llms.groq import Groq
# from llama_index.llms.openrouter import OpenRouter

class Generators:
    def __init__(self, model="llama-3.3-70b-versatile"):
        """
        Initializes the Generators class with a specified language model.

        Args:
            model (str): The name of the model to use. Defaults to "llama-3.3-70b-versatile".
        """
        self.llm = Groq(model=model, api_key="gsk_***", temperature=0.1)
        # self.llm = OpenRouter(
        #                     api_key="sk-or-v1-*****",
        #                     max_tokens=256,
        #                     context_window=4096,
        #                     model="google/gemini-2.0-flash-exp:free",
        #                 )

    def get_llm(self):
        """
        Returns the currently initialized language model (LLM) instance.

        :return: The language model instance used by the Generators class.
        """
        return self.llm

In [92]:
from llama_index.core.agent import FunctionCallingAgent
from llama_index.core.tools import FunctionTool
search_tool = FunctionTool.from_defaults(fn=search)
class AgentController:
    def __init__(self):        
        self.llm = Generators().get_llm()
        self.system_prompt = """
                                You are a legal agentic AI assistant.
                                Your task is to answer questions about legal documents.
                                The documents are stored in a Qdrant vector database. 
                                You will use hybrid search to find relevant documents and then use the retrieved documents to answer the questions.
                                Answer using the documents and try to find the answer in the documents.
                                you need to execute the search tool to get the relevant documents. Don't make mispellings.
                                In any circumstances do no return half baked function calls to the user, if you cannot invoke the function, try again.
                                dont return the function call, only the response
                                give the 
                                """
        self.agent = self.get_agent()

    def get_agent(self):
        agent = FunctionCallingAgent.from_tools([search_tool],
                                        llm=self.llm,verbose=True,
                                        system_prompt=self.system_prompt)
        return agent
    
    def chat(self, query: str):
        response_obj = self.agent.chat(query)
        return response_obj.response

### Results

In [93]:
agent = AgentController()
agent.chat("NK Prasada vs government of india, dated 12 April 2004")



This implementation will be removed in a v0.13.0.

See the docs for more information on updated usage: https://docs.llamaindex.ai/en/stable/understanding/agent/)
  return cls(

This implementation will be removed in a v0.13.0.

See the docs for more information on updated agent usage: https://docs.llamaindex.ai/en/stable/understanding/agent/)
  return old_new1(cls, *args, **kwargs)


> Running step fdc644d7-1b87-491c-b5ce-93c311c3ad17. Step input: NK Prasada vs government of india, dated 12 April 2004
Added user message to memory: NK Prasada vs government of india, dated 12 April 2004
=== Calling Function ===
Calling function: search with args: {"date_filter": {"end": "2004-12-31T23:59:59Z", "start": "2004-01-01T00:00:00Z"}, "query": "NK Prasada vs government of india"}

Performing hybrid search for query: 'NK Prasada vs government of india'


  results = client.search_batch(



--- Dense Search Results (Semantic) ---
ID: 57, Score: 0.8174, Payload: {'case_id': 'C-57', 'date': '2004-4-12'}
ID: 20, Score: 0.6749, Payload: {'case_id': 'C-20', 'date': '2004-4-19'}

--- Sparse Search Results (Keyword) ---
ID: 57, Score: 20.2186, Payload: {'case_id': 'C-57', 'date': '2004-4-12'}
ID: 20, Score: 5.4065, Payload: {'case_id': 'C-20', 'date': '2004-4-19'}

--- Fused Hybrid Search Results (RRF) ---
ID: 57, Fused Score: 0.0333, Payload: {'case_id': 'C-57', 'date': '2004-4-12'}
ID: 20, Fused Score: 0.0328, Payload: {'case_id': 'C-20', 'date': '2004-4-19'}
=== Function Output ===
Document 1: N.K. Prasada v Government Of India And Ors.
Supreme Court of India

12 April 2004
Appeal (civil) 3137 of 1999
The Judgment was delivered by: S. B. Sinha, J.
1.  The appellant herein was respondent No. 8 in one of the public interest litigations being No. 6240 of 1997 which was disposed of along with another public interest litigation being No. 5717 of 1997 and Contempt Case No. 779 of 

'The case of N.K. Prasada vs Government of India, dated 12 April 2004, is a Supreme Court of India judgment. The appellant, N.K. Prasada, was a respondent in a public interest litigation (PIL) filed in the Andhra Pradesh High Court. The PIL was filed by one B. Kistaiah, who alleged irregularities in the Customs and Central Excise department. The appellant was impleaded as a respondent in the PIL and was accused of engineering the filing of the PIL to avoid an order of transfer. \n\nThe High Court found that the appellant was the "kingpin" behind the filing of the PIL and had abused the process of the court. The court imposed a cost of Rs. 20,000 on the appellant. The appellant challenged the order in the Supreme Court, arguing that he was not given an opportunity to be heard and that the report of the Central Bureau of Investigation (CBI) was relied upon without considering his objections.\n\nThe Supreme Court dismissed the appeal, holding that the High Court had rightly found that the