In [None]:
import json
from typing import List, Dict, Any
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import lancedb
import openai
import groq
from dotenv import load_dotenv
import re
from google import genai
from google.genai import types


In [None]:
# Inputs
LANCE_DB_NAME = 'Vector_DB'
LANCEDB_TABLE_NAME = 'tech_ref'
TOP_K_PER_QUERY = 5
MAX_COMBINED_RESULTS = 20

In [None]:
Embedder_1 = SentenceTransformer ("sentence-transformers/all-MiniLM-L6-v2")
Embedder_2 = SentenceTransformer ("sentence-transformers/all-mpnet-base-v2")


# Initialise an client object with API key
load_dotenv ()
Retrieval_Client = groq.Groq ()
Gen_Client = genai.Client()

#### Utlities

**Query Keywords**  
> Function to identify key words from the query    
> This might help to retireve information based on the key word from the knowledge repo    
> By this it enhances the envelop of search in repo

In [None]:
# Identify Keywords from Query

def query_keywords (query: str, n_keywords: int = 3) -> List[str]:
    prompt = (
        'You are given a user query. With that, produce:\n'        
        f'{n_keywords} Individual Keywords that capture the crux of the query ("keywords").\n'
        'If there are fewer keywords than needed, Provide **ONLY** what is present. Dont Cookup'
        'Return JSON with keys: "keywords" (list of strings).\n'
        'User query: ' + query
    )

    messages=[
    {
        "role": "user",
        "content": prompt,
    }
    ]
    completion = Retrieval_Client.chat.completions.create(
        messages=messages,    
        model="llama-3.3-70b-versatile",
        # model="openai/gpt-oss-120b",
        stop=None,
    )

    # print (completion.choices[0].message.content)

    clean_str = re.sub(r"^```(?:json)?\s*|\s*```$", "", completion.choices[0].message.content)
    data = json.loads (clean_str)
    texts = data["keywords"]

    return texts


**Re Ranker**  
> Function to re-rank the set of texts based on semantic similarity with the query string  
> since Key word search might result in jus text matched content, re-ranking is needed

In [None]:
def re_rank_text(query: str, ref_strings: List[str]) -> List[str]:
    """
    Re-rank reference strings based on cosine similarity to a query string.

    Args:
        query: The query text.
        ref_strings: A list of reference texts to be re-ranked.

    Returns:
        List[str]: The reference strings sorted by similarity (most similar first).
    """
    _model = Embedder_1

    if not ref_strings:
        return []

    # Encode query and references
    query_emb = _model.encode(query, normalize_embeddings=True)
    ref_embs = _model.encode(ref_strings, normalize_embeddings=True)

    # Compute cosine similarity
    similarities = util.cos_sim(query_emb, ref_embs)[0]  

    # Sort reference strings by similarity (descending)
    ranked_indices = similarities.argsort(descending=True)
    ranked_refs = [ref_strings[i] for i in ranked_indices]

    return ranked_refs

**DB Connect**

In [None]:
# Connect to existing Vector DB and use data
# Create a Lance DB Vector Base
DB = lancedb.connect ('Vector_DB')

# Create a Table and add the Chunks data
table = DB.open_table ("tech_ref")
print (table.schema)

**Text Indexing**  
Make Indexing based on the text field. This allows to run key word or text search in the DB

In [None]:
table.create_fts_index("text", replace=True)
print (table.schema)

#### Augmented Generation 
> With the Context information that is fetched from the knowledge repo, the Original query is sent to LLM for provinding answer  
> It answers from within the context provided

**Query Mechanism**  
Pass the Original Query through the transformaton and expansion pipeline  
Then consolidate before being used for retrieval

In [None]:
# Query = "How various industries taking advantage of AI?"
# Query = "What is Private Cloud? Is it viable?"
# Query = "How big is the IoT industry in terms of business volume?"
# Query = "Hybrid CLoud and Edge are same?"
Query = "How new trends of AI challenging Cloud Security?"

Context = []

# First Make Semantic Search based on the Query
Query_Vector = Embedder_2.encode (Query).tolist ()
Results = table.search(Query_Vector).distance_type("cosine").distance_range(upper_bound=0.6).limit(5).to_list ()

# Append to the list
Text_List = [r['text'] for r in Results]
Context = Context + Text_List

# Identtify Keywords from the query
Keywords = query_keywords (Query)

# Keyword search done in the Vector DB
for word in Keywords :
    
    # Search is enabled by the Full Text Search Index in the DB
    Results = table.search(word).limit(5).to_list ()
    Text_List = [r['text'] for r in Results]
    Context = Context + Text_List

# Run a serch based on Query itself
Results = table.search(Query).limit(5).to_list ()
Text_List = [r['text'] for r in Results]
Context = Context + Text_List

# remove duplicates
Context = list(set(Context))
print (len(Context))


**Generation**  
With the context build based on both meaning and word search, re-ranking is necessary  
Once Re-ranked, take top_k and provide as context for generation

In [None]:
# Re-rank the picked chunks of text based on semantic similarity with the original Query
Re_Rank_Context = re_rank_text (Query, Context)
Re_Rank_Context = Re_Rank_Context [:10]

# Instruction for the LLM
Instruction = """You are given context information and a user query. You have to provide detailed answer to user query based on information provided in context.
                Provide an informative answer to the user query **BASED** on the context.
                If sufficient details are not in context, respond as "No Sufficient Details"
            """

response = Gen_Client.models.generate_content(
                model="gemini-2.0-flash",
                config =types.GenerateContentConfig(
                            system_instruction=Instruction,
                            # temperature=0.0
                            ),
                contents = ["Context : \n"+str(Re_Rank_Context), "User Query : \n"+Query]
)

print(response.text)


In [None]:
# Re-struture the output
Instruction = """You are given context information and a user query. You have to provide detailed answer ("Answer") to user query based on information provided in context.
                Provide an informative answer to the user query **BASED** on the context.
                If sufficient details are not in context, respond as "No Sufficient Details"
                Also you will have to provide a summary of the context in 500 words ("Context"), as a justification to your answer
                Respond text in JSON format with keys : {"Answer" : string, "Context" : Summary string}
            """

response = Gen_Client.models.generate_content(
                model="gemini-2.0-flash",
                config =types.GenerateContentConfig(
                            system_instruction=Instruction,
                            # temperature=0.0
                            ),
                contents = ["Context : \n"+str(Re_Rank_Context), "User Query : \n"+Query]
)

print(response.text)

**More details**  
Variation of response to include summary

In [None]:
# Generation with different LLM
Instruction = """You are given context information and a user query. You have to provide detailed answer to user query based on information provided in context.
                Provide an informative answer to the user query **BASED** on the context. Provide as Marked Down Text
                If sufficient details are not in context, respond as "No Sufficient Details"
            """

messages=[
            {
                "role": "system",
                "content": Instruction,
            },
            {
                "role": "user",
                "content": "Context : \n"+str(Re_Rank_Context)+"\n\nUser Query : \n"+Query,
            }
        ]
completion = Retrieval_Client.chat.completions.create(
    messages=messages,    
    # model="llama-3.3-70b-versatile",
    model="openai/gpt-oss-120b",
    stop=None,
)

print(completion.choices[0].message.content)