### Imports

In [2]:
from typing import List, Tuple, Dict

In [3]:
from config.settings import settings

### Document Loading

In [4]:
from src.ingestion.DocumentLoader import DocumentLoader

In [5]:
loader = DocumentLoader()

In [6]:
folder_name = "index"

In [7]:
files = loader.list_filenames(folder_name)
files

[METRICS] list_filenames: time=0.00s, count=1


['Attention_is_all_you_need.pdf']

In [8]:
docs = loader.load_documents(subdir=folder_name,file_names=files)
# print(type(docs[0].page_content))

[METRICS] load_documents: time=0.81s, count=15


### Chunking

In [None]:
from src.ingestion.DocumentChunker import DocumentChunker

In [None]:
chunker = DocumentChunker(
    hf_embedding_model="sentence-transformers/all-mpnet-base-v2",
    chunk_size=300,
    chunk_overlap=80
)

In [None]:
chunks = chunker.chunk_documents(docs)
token_count = chunker.get_docs_token_count(chunks)

In [None]:
print(len(chunks))
print(token_count)

### Embedding

In [None]:
from src.ingestion.HuggingFaceEmbedder import HuggingFaceEmbedder

In [None]:
embedder = HuggingFaceEmbedder("sentence-transformers/all-mpnet-base-v2")

In [None]:
v1  = embedder.embed_query(chunks[0].page_content)
print("dimension",len(v1))

### Vector Store Management

In [None]:
from src.ingestion.VectorStoreManager import VectorStoreManager

In [None]:
vsm = VectorStoreManager(embedding_function=embedder,index_name=folder_name)

In [None]:
vsm.create_index()

In [None]:
vsm.add_documents(chunks)

In [None]:
# vsm.save_local()

In [None]:
# vsm.load_local(allow_pickle=True)

### Retrieval

In [None]:
retrieved = vsm.similarity_search_with_score(query="experience at dolf", k=2)

In [None]:
retriever = vsm.retriever(search_type = "similarity", search_kwargs = {"k":2})

In [None]:
retrieved = retriever.invoke("projects by snakalp")

In [None]:
texts = [ret.page_content for ret in retrieved]
texts

### Supported LLMs (as of 06/05/2025)

In [None]:
hf_llms=[
    "meta-llama/Llama-3.1-8B-Instruct",
    "meta-llama/Llama-3.3-70B-Instruct",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
]

In [None]:
from huggingface_hub import InferenceClient
from config.settings import settings

client = InferenceClient(
    provider="hf-inference",
    api_key=settings.HF_TOKEN.get_secret_value(),
)

def get_answer(
    sys_prompt: str,
    query: str,
    model: str = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
) -> str:
    """
    Send a system + user prompt to the specified model via HF Inference,
    returning the assistant’s content string.
    """
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": sys_prompt},
            {"role": "user",   "content": query}
        ]
    )
    return response.choices[0].message.content


In [None]:
response = get_answer(sys_prompt="you are a helpful assistant who answers the users query concisely", query="what are the top houses in game of thrones")

In [None]:
get_answer(sys_prompt="you are a helpful assistant who answers the users query concisely", query="what are the top houses in game of thrones")

### Generation Pipeline

In [None]:
from src.generation.HuggingFaceLLM import HuggingFaceLLM

In [None]:
pg_llm = HuggingFaceLLM(model_name="meta-llama/Llama-3.1-8B-Instruct")

In [None]:
# pg_llm.get_answer(sys_prompt="you are a helpful assistant that answers concisely", user_prompt="what is quantum computing ?", max_tokens = 200)

In [None]:
from src.generation.PromptAugmentor import PromptAugmentor

In [None]:
augmentor = PromptAugmentor(client=pg_llm)

In [None]:
prompts = augmentor.generate(query="what is a graph db and how is it different from a regular VectorDB ?", synthetic_count=4)
prompts

In [None]:
from tqdm.auto import tqdm

prompt_chunks = []
for p in tqdm(prompts, desc="Retrieving chunks", unit="prompts"):
    docs = retriever.invoke(p)
    prompt_chunks.append((p, docs))


In [None]:
from src.generation.Fusion import FusionSummarizer
from src.generation.Prompts import Prompts

In [None]:
fusion_summarizer = FusionSummarizer(fusion_llm=pg_llm,sys_prompt=Prompts.MERGE_FUSION_SYS_PROMPT)

In [None]:
summaries = fusion_summarizer.summarize(prompt_chunks=prompt_chunks)

In [None]:
all_summaries = "\n\n".join(summaries)

In [None]:
final_llm = HuggingFaceLLM(model_name="meta-llama/Llama-3.3-70B-Instruct")

In [None]:
final_answer = final_llm.get_answer(sys_prompt=Prompts.FINAL_ANS_SYS_PROMPT,user_prompt="User Question: \nwhat is a graph db and how is it different from a regular VectorDB ? \n\n Context: \n"+all_summaries,max_tokens = 400, temperature = 0.7)

In [None]:
print(final_answer)