In [None]:
import os
import json
from typing import List, Dict, Any
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
# from tqdm import tqdm
import lancedb
import openai
import groq
from dotenv import load_dotenv
import re
from google import genai
from google.genai import types


In [None]:
# Inputs
LANCE_DB_NAME = 'Vector_DB'
LANCEDB_TABLE_NAME = 'tech_ref'
TOP_K_PER_QUERY = 5
MAX_COMBINED_RESULTS = 20

In [None]:
Embedder_1 = SentenceTransformer ("sentence-transformers/all-MiniLM-L6-v2")
Embedder_2 = SentenceTransformer ("sentence-transformers/all-mpnet-base-v2")


# Initialise an client object with API key
load_dotenv ()
Retrieval_Client = groq.Groq ()
Gen_Client = genai.Client()

#### Utlities

**Query tranformation**  
> Function to transform the query into more precide and paraphrased ones  
> This might help to retireve information that are hidden in details  
> By paraphrasing with the help of LLM, the query is brought with more nuances

In [None]:
# Query transformation (LLM + fallback)

def transform_query (query: str, n_paraphrases: int = 3) -> List[str]:
    prompt = (
        'You are given a user query. With that, produce:\n'
        f'1) a precise reformulation suitable for content retrieval ("precise")\n'
        f'2) {n_paraphrases} concise paraphrases of the original query suitable for semantic retrieval ("paraphrases")\n'
        'Return JSON with keys: "precise" (string) and "paraphrases" (list of strings).\n'
        'User query: ' + query
    )

    messages=[
    {
        "role": "user",
        "content": prompt,
    }
    ]
    completion = Retrieval_Client.chat.completions.create(
        messages=messages,    
        model="llama-3.3-70b-versatile",
        # model="openai/gpt-oss-120b",
        stop=None,
    )

    # print (completion.choices[0].message.content)

    clean_str = re.sub(r"^```(?:json)?\s*|\s*```$", "", completion.choices[0].message.content)
    data = json.loads (clean_str)
    texts = [data["precise"]] + data["paraphrases"]

    return texts


**Query Expansion**  
> Function to Expand the query into multiple queries with the same intent of original query  
> This would help to retireve information that are brought out by variations
> By creating varations with the help of LLM, query is possible to get into more semantically similar areas while retrieving

In [None]:
def expand_query (query: str, n_alternates: int = 3) -> List[str]:
    prompt = (
        f'Generate {n_alternates} diverse query variations that can expand search horizon, but preserve intent of the following user query:\nQuery: {query}\n'
        'Return JSON with keys: "alternates" (list of strings).\n'
        'User query: ' + query
    )

    messages=[
    {
        "role": "user",
        "content": prompt,
    }
    ]
    completion = Retrieval_Client.chat.completions.create(
        messages=messages,    
        model="llama-3.3-70b-versatile",
        # model="openai/gpt-oss-120b",
        stop=None,
    )

    # print (completion.choices[0].message.content)

    clean_str = re.sub(r"^```(?:json)?\s*|\s*```$", "", completion.choices[0].message.content)
    data = json.loads (clean_str)
    texts = data["alternates"]

    return texts

In [None]:
# Connect to existing Vector DB and use data
# Create a Lance DB Vector Base
DB = lancedb.connect ('Vector_DB')

# Create a Table and add the Chunks data
table = DB.open_table ("tech_ref")
print (table.schema)

**Query Mechanism**  
Pass the Original Query through the transformaton and expansion pipeline  
Then consolidate before being used for retrieval

In [None]:
# Query = "How is the industry adapting to AI?"
# Query = "Cloud computing Vs Edge computing"
Query = "What is the timeline it took from ML to AI?"
# Query = "Tell me what was the motivation for the indutry to develop Edge Technology"
# Query = "Did it rain yesterday during the business hours?"

trans_queries = transform_query (Query)
expand_queries = expand_query (Query)

# print ("Transformed Queries : \n")
# for t in trans_queries:
#     print (t)

# print ("Expanded Queries : \n")
# for e in expand_queries:
#     print (e)


In [None]:
# Search from the Vector DB for each of the queries that are formulated
queries  = trans_queries + expand_queries

Context = []

for query in queries :

    Query_Vector = Embedder_2.encode (query).tolist ()
    Results = table.search(Query_Vector).distance_type("cosine").distance_range(upper_bound=0.6).limit(5).to_list ()

    # print (len (Results))

    Text_List = [r['text'] for r in Results]
    Context = Context + Text_List

Context = list(set(Context))
print (len(Context))

#### Augmented Generation 
> With the Context information that is fetched from the knowledge repo, the Original query is sent to LLM for provinding answer  
> It answers from within the context provided

In [None]:
# Instruction for the LLM
Instruction = """You are given context information and a user query. You have to provide detailed answer to user query based on information provided in context.
                Provide an informative answer to the user query **BASED** on the context.
                If sufficient details are not in context, respond as "No Sufficient Details"
            """

response = Gen_Client.models.generate_content(
                model="gemini-2.0-flash",
                config =types.GenerateContentConfig(
                            system_instruction=Instruction,
                            # temperature=0.0
                            ),
                contents = ["Context : \n"+str(Context), "User Query : \n"+Query]
)

print(response.text)

**More details**  
Variation of response to include summary

In [None]:
# Re-struture the output
Instruction = """You are given context information and a user query. You have to provide detailed answer ("Answer") to user query based on information provided in context.
                Provide an informative answer to the user query **BASED** on the context.
                If sufficient details are not in context, respond as "No Sufficient Details"
                Also you will have to provide a summary of the context in 300 words ("Context")
                Respond in JSON format with keys : {"Answer" : string, "Context" : Summary string}
            """

response = Gen_Client.models.generate_content(
                model="gemini-2.0-flash",
                config =types.GenerateContentConfig(
                            system_instruction=Instruction,
                            # temperature=0.0
                            ),
                contents = ["Context : \n"+str(Context), "User Query : \n"+Query]
)

print(response.text)

In [None]:
# Generation with different LLM
Instruction = """You are given context information and a user query. You have to provide detailed answer to user query based on information provided in context.
                Provide an informative answer to the user query **BASED** on the context.
                If sufficient details are not in context, respond as "No Sufficient Details"
            """

messages=[
            {
                "role": "system",
                "content": Instruction,
            },
            {
                "role": "user",
                "content": "Context : \n"+str(Context)+"\n\nUser Query : \n"+Query,
            }
        ]
completion = Retrieval_Client.chat.completions.create(
    messages=messages,    
    # model="llama-3.3-70b-versatile",
    model="openai/gpt-oss-120b",
    stop=None,
)

print(completion.choices[0].message.content)