### Imports

In [1]:
from typing import List, Tuple, Dict

In [2]:
from config.settings import settings

### Document Loading

In [3]:
from src.ingestion.DocumentLoader import DocumentLoader

In [4]:
loader = DocumentLoader()

In [5]:
folder_name = "index"

In [6]:
files = loader.list_filenames(folder_name)
files

[METRICS] list_filenames: time=0.00s, count=1


['Attention_is_all_you_need.pdf']

In [7]:
docs = loader.load_documents(subdir=folder_name,file_names=files)
# print(type(docs[0].page_content))

[METRICS] load_documents: time=0.94s, count=15


### Chunking

In [8]:
from src.ingestion.DocumentChunker import DocumentChunker

In [9]:
chunker = DocumentChunker(
    hf_embedding_model="sentence-transformers/all-mpnet-base-v2",
    chunk_size=300,
    chunk_overlap=80
)

In [10]:
chunks = chunker.chunk_documents(docs)
token_count = chunker.get_docs_token_count(chunks)

[METRICS] chunk_documents: time=0.05s, count=36
[METRICS] get_docs_token_count: time=0.02s, count=36


In [11]:
print(len(chunks))
print(token_count)

36
11497


### Embedding

In [12]:
from src.ingestion.HuggingFaceEmbedder import HuggingFaceEmbedder

In [13]:
embedder = HuggingFaceEmbedder("sentence-transformers/all-mpnet-base-v2")

In [14]:
v1  = embedder.embed_query(chunks[0].page_content)
print("dimension",len(v1))

[METRICS] embed_query: time=0.89s, count=386
dimension 768


### Vector Store Management

In [15]:
from src.ingestion.VectorStoreManager import VectorStoreManager

In [16]:
vsm = VectorStoreManager(embedding_function=embedder,index_name=folder_name)

INFO:src.ingestion.VectorStoreManager:VectorStoreManager initialized for index 'index'


In [17]:
vsm.create_index()

INFO:src.ingestion.VectorStoreManager:Created FAISS index 'index' with dim=768


[METRICS] embed_query: time=0.07s, count=4


In [18]:
vsm.add_documents(chunks)

INFO:src.ingestion.HuggingFaceEmbedder:Embedding 36 documents with model 'sentence-transformers/all-mpnet-base-v2'
INFO:src.ingestion.HuggingFaceEmbedder:Successfully embedded 36/36 documents
INFO:src.ingestion.VectorStoreManager:Added 36 documents to 'index'


[METRICS] embed_documents: time=15.94s, count=11497
[METRICS] add_documents: time=15.97s, count=36


['f3932688-275c-48ca-902c-b186fbd99e26',
 '74e2e924-4ce4-4384-958d-3a7e341ccf81',
 'a29630f8-64a8-4131-9309-fcbdefc6129f',
 'b6e1cbc1-f624-4df4-9f4f-997485846718',
 'c6885dcd-917b-4edd-8e2d-0152cc0f2419',
 '88f17933-f4d6-4c34-9336-2ccc365ac3f9',
 '2c647bd8-8ade-443c-9bf6-b645aa98f9ba',
 '12c0afc5-4150-4e42-b316-da4359d717a2',
 'cbb595be-e718-46e5-ae45-947edaf741c2',
 '04304937-f9a7-4b6d-a0e2-de8d387dc5a1',
 '94b6971d-3937-4a0b-b547-f1733314b1bc',
 '705c0175-1c75-4428-90e9-055eee2ddb1a',
 '93ae4c99-c91e-4479-abd7-d5fa12f63979',
 'c3180194-44d4-47ff-8849-110f71fb243a',
 'd9f864e2-00c1-490c-ad0c-aded3c5eefb0',
 '555036f1-a45c-4ece-a4fb-b4eccd389784',
 'ff9f741f-241c-4148-ad32-829c88ae09a6',
 '760b265c-3656-4730-a69c-6bbe4b51f595',
 '660ac5b9-9d97-41d8-a232-05d1762509a3',
 '19835973-7388-48f3-b53d-1b20a0d7108d',
 '46278085-4ac2-4a6f-b019-a512c0885f03',
 '15ee65b4-61cb-44ec-a017-cd06db2cf91b',
 '54bdd885-e918-4933-a037-537ad7cc46ad',
 '3c5fc77a-ea4e-4cc6-81fb-f9cc1bce8458',
 'aa0fdb82-b583-

In [None]:
# vsm.save_local()

In [None]:
# vsm.load_local(allow_pickle=True)

### Retrieval

In [None]:
retrieved = vsm.similarity_search_with_score(query="experience at dolf", k=2)

In [None]:
retriever = vsm.retriever(search_type = "similarity", search_kwargs = {"k":2})

In [None]:
retrieved = retriever.invoke("projects by snakalp")

In [None]:
texts = [ret.page_content for ret in retrieved]
texts

### Supported LLMs (as of 06/05/2025)

In [None]:
hf_llms=[
    "meta-llama/Llama-3.1-8B-Instruct",
    "meta-llama/Llama-3.3-70B-Instruct",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
]

In [None]:
from huggingface_hub import InferenceClient
from config.settings import settings

client = InferenceClient(
    provider="hf-inference",
    api_key=settings.HF_TOKEN.get_secret_value(),
)

def get_answer(
    sys_prompt: str,
    query: str,
    model: str = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
) -> str:
    """
    Send a system + user prompt to the specified model via HF Inference,
    returning the assistant’s content string.
    """
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": sys_prompt},
            {"role": "user",   "content": query}
        ]
    )
    return response.choices[0].message.content


In [None]:
response = get_answer(sys_prompt="you are a helpful assistant who answers the users query concisely", query="what are the top houses in game of thrones")

In [None]:
get_answer(sys_prompt="you are a helpful assistant who answers the users query concisely", query="what are the top houses in game of thrones")

### Generation Pipeline

In [None]:
from src.generation.HuggingFaceLLM import HuggingFaceLLM

In [None]:
pg_llm = HuggingFaceLLM(model_name="meta-llama/Llama-3.1-8B-Instruct")

In [None]:
# pg_llm.get_answer(sys_prompt="you are a helpful assistant that answers concisely", user_prompt="what is quantum computing ?", max_tokens = 200)

In [None]:
from src.generation.PromptAugmentor import PromptAugmentor

In [None]:
augmentor = PromptAugmentor(client=pg_llm)

In [None]:
prompts = augmentor.generate(query="what is a graph db and how is it different from a regular VectorDB ?", synthetic_count=4)
prompts

In [None]:
from tqdm.auto import tqdm

prompt_chunks = []
for p in tqdm(prompts, desc="Retrieving chunks", unit="prompts"):
    docs = retriever.invoke(p)
    prompt_chunks.append((p, docs))


In [None]:
from src.generation.Fusion import FusionSummarizer
from src.generation.Prompts import Prompts

In [None]:
fusion_summarizer = FusionSummarizer(fusion_llm=pg_llm,sys_prompt=Prompts.MERGE_FUSION_SYS_PROMPT)

In [None]:
summaries = fusion_summarizer.summarize(prompt_chunks=prompt_chunks)

In [None]:
all_summaries = "\n\n".join(summaries)

In [None]:
final_llm = HuggingFaceLLM(model_name="meta-llama/Llama-3.3-70B-Instruct")

In [None]:
final_answer = final_llm.get_answer(sys_prompt=Prompts.FINAL_ANS_SYS_PROMPT,user_prompt="User Question: \nwhat is a graph db and how is it different from a regular VectorDB ? \n\n Context: \n"+all_summaries,max_tokens = 400, temperature = 0.7)

In [None]:
print(final_answer)