### Imports

In [2]:
from typing import List, Tuple, Dict

In [1]:
from config.settings import settings

### Document Loading

In [3]:
from src.ingestion.DocumentLoader import DocumentLoader

In [4]:
loader = DocumentLoader()

In [5]:
folder_name = "index"

In [6]:
files = loader.list_filenames(folder_name)
files

[METRICS] list_filenames: time=0.00s, count=1


['Graph_Databases_for_Beginners.pdf']

In [7]:
docs = loader.load_documents(subdir=folder_name,file_names=files)
# print(type(docs[0].page_content))

Files:   0%|          | 0/1 [00:00<?, ?file/s]

[METRICS] load_documents: time=2.80s, count=46


### Chunking

In [8]:
from src.ingestion.DocumentChunker import DocumentChunker

In [9]:
chunker = DocumentChunker(
    hf_embedding_model="sentence-transformers/all-mpnet-base-v2",
    chunk_size=300,
    chunk_overlap=80
)

In [10]:
chunks = chunker.chunk_documents(docs)
token_count = chunker.get_docs_token_count(chunks)

Chunking documents:   0%|          | 0/46 [00:00<?, ?doc/s]

[METRICS] chunk_documents: time=0.47s, count=88
[METRICS] get_docs_token_count: time=0.18s, count=88


In [11]:
print(len(chunks))
print(token_count)

88
26370


### Embedding

In [12]:
from src.ingestion.HuggingFaceEmbedder import HuggingFaceEmbedder

In [13]:
embedder = HuggingFaceEmbedder("sentence-transformers/all-mpnet-base-v2")

In [15]:
embedder.embed_documents([chunk_txt.page_content for chunk_txt in chunks])

Embedding documents:   0%|          | 0/88 [00:00<?, ?doc/s]

[METRICS] embed_documents: time=59.13s, count=26370


[[-0.017373530194163322,
  0.03784393519163132,
  -0.036408040672540665,
  -0.0102456696331501,
  -0.0007378659793175757,
  -0.019672829657793045,
  -0.00465614628046751,
  -0.015719246119260788,
  0.03771594911813736,
  0.02384931966662407,
  0.02266942523419857,
  0.0036350013688206673,
  0.021248994395136833,
  0.11568453907966614,
  0.004228704143315554,
  -0.031792156398296356,
  0.04510628059506416,
  0.039778128266334534,
  -0.03197060525417328,
  0.003414876526221633,
  -0.021681873127818108,
  0.05809233337640762,
  -0.020326487720012665,
  -0.02122456766664982,
  -0.041473377496004105,
  -0.020732026547193527,
  -0.03941815719008446,
  0.047502100467681885,
  -0.02649296633899212,
  -0.03711891546845436,
  0.023602047935128212,
  0.038821347057819366,
  -0.0198829285800457,
  0.04074656590819359,
  1.973927510334761e-06,
  -0.026222554966807365,
  -0.06969652324914932,
  -0.013813813216984272,
  0.006749598775058985,
  -0.03631288930773735,
  0.029801374301314354,
  -0.009536

In [17]:
v1  = embedder.embed_query(chunks[0].page_content)
print("dimension",len(v1))

[METRICS] embed_query: time=0.37s, count=32
dimension 768


### Vector Store Management

In [None]:
from src.ingestion.VectorStoreManager import VectorStoreManager

In [19]:
vsm = VectorStoreManager(embedding_function=embedder,index_name=folder_name)

In [22]:
vsm.create_index()

INFO:src.ingestion.VectorStoreManager:Created in‐memory FAISS index 'index' (dim=768)


[METRICS] embed_query: time=0.16s, count=4


In [23]:
vsm.add_documents(chunks)

INFO:src.ingestion.VectorStoreManager:Added 88 docs into 'index'


[METRICS] embed_documents: time=61.24s, count=26370
[METRICS] add_documents: time=61.42s, count=88


['3b937cfb-d524-4eb6-8ea7-b83753f6fa8f',
 '99a1dc26-69b9-4e0d-b56b-add9c7372cd7',
 'bb29b478-08d2-4f93-9057-3888b3957d9d',
 '27f960f4-a0ca-4a45-85ea-8669c2a39fc5',
 '9e88b23f-10d7-4971-9f4d-aceb7e6ad78d',
 '86d414aa-9b7f-4d53-8f4c-e1edf1446efe',
 'f7cf26dd-039a-4b5a-9abb-dd4d6b5e492a',
 'bb2ab7a1-c45e-49f7-8431-f709e4529c7d',
 'dfaa63cd-8942-49b4-b55b-e45e91ffdce5',
 '719d87d3-9108-4d2a-a2b5-deeb65c1ff84',
 '0d48c7cd-ee07-45b5-998f-44c0e12dd802',
 'fa99c4df-c712-452a-9c38-450e0afba6ba',
 'd1b3d024-0383-45e5-8efc-360ad5b99099',
 'ca051af7-636e-4f55-9979-a8d1a91227fe',
 '0e602ffe-48df-4d5e-9f35-5bd27ca116ac',
 'd97bdbdf-b8cc-4b42-a60f-11a2fb4b6b32',
 'e2b58851-c68b-4761-b4f4-c39b6549cb09',
 'f8458a07-0fc2-4033-ae89-97f2e92cbb30',
 '835ff2ce-9257-40be-bb8e-68ee349392d1',
 '7675e7b7-7a55-446d-bc1a-2e4d7d9f0301',
 '4ea423b8-8a5d-4815-878d-b554c29c3395',
 '95194159-0b88-4506-a21b-045d1b6bd34a',
 '2186d6dc-cb09-4ecc-860e-b9164cff825f',
 '16f3f66e-a234-4447-b3b9-89fef74d15c7',
 '2f9c3683-7950-

In [24]:
# vsm.save_local()

In [25]:
# vsm.load_local(allow_pickle=True)

### Retrieval

In [21]:
retrieved = vsm.similarity_search_with_score(query="experience at dolf", k=2)

[METRICS] embed_query: time=0.08s, count=6
[METRICS] similarity_search_with_score: time=0.09s, count=2


In [26]:
retriever = vsm.retriever(search_type = "similarity", search_kwargs = {"k":2})

INFO:src.ingestion.VectorStoreManager:Created retriever for 'index' with {'search_type': 'similarity', 'search_kwargs': {'k': 2}}


In [23]:
retrieved = retriever.invoke("projects by snakalp")

[METRICS] embed_query: time=0.17s, count=8


In [24]:
texts = [ret.page_content for ret in retrieved]
texts

['- time recommendations, graph - based search or supply - chain management, be sure to review all the different ways in which graph technology can work for your company. and while our customers span several continents and professional fields, they all agree that using the neo4j graph database is a critical component of their business success and competitiveness. are you a developer eager to learn more about making the switch? with so many ways to quickly get started, mastering graph database development is one of the best time investments you can make. other resources videos : • intro to neo4j and graph databases • intro to graph databases episode # 1 - evolution of dbs books : • o ’ reilly book : graph databases • learning neo4j trainings : • online training : getting started with neo4j • classroom trainings whether you need a solution that provides real - time recommendations, graph - based search or supply - chain management, be sure to review all the different ways in which graph 

### Supported LLMs (as of 06/05/2025)

In [None]:
hf_llms=[
    "meta-llama/Llama-3.1-8B-Instruct",
    "meta-llama/Llama-3.3-70B-Instruct",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
]

In [56]:
from huggingface_hub import InferenceClient
from config.settings import settings

client = InferenceClient(
    provider="hf-inference",
    api_key=settings.HF_TOKEN.get_secret_value(),
)

def get_answer(
    sys_prompt: str,
    query: str,
    model: str = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
) -> str:
    """
    Send a system + user prompt to the specified model via HF Inference,
    returning the assistant’s content string.
    """
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": sys_prompt},
            {"role": "user",   "content": query}
        ]
    )
    return response.choices[0].message.content


In [57]:
get_answer(sys_prompt="you are a helpful assistant who answers the users query concisely", query="what are the top houses in game of thrones")

'A concise question about Westeros! Here are the **Top Houses in Game of Thrones**, in no particular order, highlighting their **Sigil**, **Motto**, and **Notable Members**:\n\n1. **House Stark**\n\t* **Sigil**: Direwolf\n\t* **Motto**: "Winter is Coming"\n\t* **Notable Members**: Eddard (Ned), Robb, Sansa, Arya, Bran, Jon Snow\n\n2. **House Lannister**\n\t* **Sigil**: Lion\n\t* **Motto**: "Hear Me Roar!"\n\t* **Notable Members**: Cersei, Jaime, Tyrion, Tywin, Kevan\n\n3. **House Targaryen**\n\t* **Sigil**: Dragon\n\t* **Motto**: "Fire and Blood"\n\t* **Notable Members**: Daenerys, Viserys, Rhaegar, Aerys II (Mad King), Jon Snow (Aegon Targaryen)\n\n4. **House Baratheon**\n\t* **Sigil**: Stag\n\t* **Motto**: "Ours is the Fury"\n\t* **Notable Members**: Robert, Stannis, Renly, Joffrey, Myrcella, Tommen\n\n5. **House Tyrell**\n\t* **Sigil**: Rose\n\t* **Motto**: "Growing Strong"\n\t* **Notable Members**: Mace, Loras, Margaery, Olenna (Queen of Thorns)\n\n6. **House Greyjoy**\n\t* **Sigil

### Generation Pipeline

In [27]:
from src.generation.HuggingFaceLLM import HuggingFaceLLM

In [28]:
pg_llm = HuggingFaceLLM(model_name="meta-llama/Llama-3.1-8B-Instruct")

In [29]:
# pg_llm.get_answer(sys_prompt="you are a helpful assistant that answers concisely", user_prompt="what is quantum computing ?", max_tokens = 200)

In [30]:
from src.generation.PromptAugmentor import PromptAugmentor

In [31]:
augmentor = PromptAugmentor(client=pg_llm)

In [32]:
prompts = augmentor.generate(query="what is a graph db and how is it different from a regular VectorDB ?", synthetic_count=2)
prompts

INFO:src.generation.PromptAugmentor:Requesting synthetic prompt 1/2
INFO:src.generation.HuggingFaceLLM:get_answer called
INFO:src.generation.HuggingFaceLLM:API call successful: model=meta-llama/Llama-3.1-8B-Instruct messages=2
INFO:src.generation.HuggingFaceLLM:get_answer returning 136 characters
INFO:src.generation.PromptAugmentor:Generated prompt #1: 'What is the key difference between a Graph Database and a traditional Vector Database in terms of data structure and query capabilities?'
INFO:src.generation.PromptAugmentor:Requesting synthetic prompt 2/2
INFO:src.generation.HuggingFaceLLM:get_answer called
INFO:src.generation.HuggingFaceLLM:API call successful: model=meta-llama/Llama-3.1-8B-Instruct messages=2
INFO:src.generation.HuggingFaceLLM:get_answer returning 102 characters
INFO:src.generation.PromptAugmentor:Generated prompt #2: 'What are the primary use cases for Graph Databases versus Vector Databases in real-world applications?'


['What is the key difference between a Graph Database and a traditional Vector Database in terms of data structure and query capabilities?',
 'What are the primary use cases for Graph Databases versus Vector Databases in real-world applications?',
 'what is a graph db and how is it different from a regular VectorDB ?']

In [33]:
retriever = vsm.retriever(search_type="similarity", search_kwargs={"k": 4})
prompt_chunks = [(p, retriever.invoke(p)) for p in prompts]

INFO:src.ingestion.VectorStoreManager:Created retriever for 'index' with {'search_type': 'similarity', 'search_kwargs': {'k': 4}}


[METRICS] embed_query: time=0.14s, count=25
[METRICS] embed_query: time=0.23s, count=20
[METRICS] embed_query: time=0.28s, count=18


In [46]:
from src.generation.Fusion import FusionSummarizer
from src.generation.Prompts import Prompts

In [47]:
fusion_summarizer = FusionSummarizer(fusion_llm=pg_llm,sys_prompt=Prompts.MERGE_FUSION_SYS_PROMPT)

In [48]:
summaries = fusion_summarizer.summarize(prompt_chunks=prompt_chunks)

INFO:src.generation.Fusion:Generating summary for prompt 1: 'What is the key difference between a Graph Databas...'
INFO:src.generation.HuggingFaceLLM:get_answer called
INFO:src.generation.HuggingFaceLLM:API call successful: model=meta-llama/Llama-3.1-8B-Instruct messages=2
INFO:src.generation.HuggingFaceLLM:get_answer returning 1111 characters
INFO:src.generation.Fusion:Generating summary for prompt 2: 'What are the primary use cases for Graph Databases...'
INFO:src.generation.HuggingFaceLLM:get_answer called
INFO:src.generation.HuggingFaceLLM:API call successful: model=meta-llama/Llama-3.1-8B-Instruct messages=2
INFO:src.generation.HuggingFaceLLM:get_answer returning 1111 characters
INFO:src.generation.Fusion:Generating summary for prompt 3: 'what is a graph db and how is it different from a ...'
INFO:src.generation.HuggingFaceLLM:get_answer called
INFO:src.generation.HuggingFaceLLM:API call successful: model=meta-llama/Llama-3.1-8B-Instruct messages=2
INFO:src.generation.HuggingFace

In [41]:
all_summaries = "\n\n".join(summaries)

In [42]:
final_llm = HuggingFaceLLM(model_name="meta-llama/Llama-3.3-70B-Instruct")

INFO:src.generation.HuggingFaceLLM:Creating new HuggingFaceLLM for model: meta-llama/Llama-3.3-70B-Instruct
INFO:src.generation.HuggingFaceLLM:InferenceClient initialized for model: meta-llama/Llama-3.3-70B-Instruct


In [43]:
final_answer = final_llm.get_answer(sys_prompt=Prompts.FINAL_ANS_SYS_PROMPT,user_prompt="User Question: \nwhat is a graph db and how is it different from a regular VectorDB ? \n\n Context: \n"+all_summaries,max_tokens = 400, temperature = 0.7)

INFO:src.generation.HuggingFaceLLM:get_answer called
INFO:src.generation.HuggingFaceLLM:API call successful: model=meta-llama/Llama-3.3-70B-Instruct messages=2
INFO:src.generation.HuggingFaceLLM:get_answer returning 599 characters


In [45]:
print(final_answer)

A graph database is an online, operational database management system that operates on a graph data model, storing data as nodes and relationships. It is different from a regular VectorDB in terms of data model, storage, and processing. Graph databases use a graph data model, native graph storage, and native graph processing, whereas VectorDBs are suited for dense, high-dimensional data and are often used in scenarios such as recommendation systems, image and speech recognition, and Natural Language Processing (NLP). Insufficient information is available to provide a more detailed comparison.
