### Imports

In [1]:
from src.ingestion.loader import DocumentLoader
from src.ingestion.chunker import DocumentChunker
from src.ingestion.HuggingFaceEmbedder import HuggingFaceEmbedder
from config.settings import settings
from src.ingestion.VectorStoreManager import VectorStoreManager

### Document Loading

In [2]:
loader = DocumentLoader()

In [3]:
files = loader.list_filenames("pdfs")
files

[METRICS] list_filenames: time=0.00s, count=3


['Graph_Databases_for_Beginners.pdf',
 'Project_4_Sankalp_Mane.pdf',
 'requirements.txt']

In [4]:
docs = loader.load_documents("pdfs",file_names=files)
# print(type(docs[0].page_content))

[METRICS] load_documents: time=1.33s, count=70


### Chunking

In [5]:
chunker = DocumentChunker(
    hf_embedding_model="sentence-transformers/all-mpnet-base-v2",
    chunk_size=300,
    chunk_overlap=80
)

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2


In [6]:
chunks = chunker.chunk_documents(docs)
token_count = chunker.get_docs_token_count(chunks)

[METRICS] chunk_documents: time=0.12s, count=122
[METRICS] get_docs_token_count: time=0.05s, count=122


In [7]:
print(len(chunks))
print(token_count)

122
33989


### Embedding

In [8]:
embedder = HuggingFaceEmbedder("sentence-transformers/all-mpnet-base-v2")

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2


In [9]:
v1  = embedder.embed_query(chunks[0].page_content)
print("dimension",len(v1))

[METRICS] embed_query: time=0.05s, count=32
dimension 768


### Vector Store Management

In [10]:
vs = VectorStoreManager(embedding_function=embedder,index_name="pdfs")

In [11]:
vs.create_index()

INFO:src.ingestion.VectorStoreManager:Created in‐memory FAISS index 'pdfs' (dim=768)


[METRICS] embed_query: time=0.10s, count=4


In [12]:
vs.add_documents(chunks)

INFO:src.ingestion.VectorStoreManager:Added 122 docs into 'pdfs'


[METRICS] embed_documents: time=45.21s, count=33989
[METRICS] add_documents: time=45.27s, count=122


['7211ebd7-9c3b-4bf4-9ca4-447db1af927e',
 '047b2ba5-1739-47eb-a3eb-ef54c08f4a65',
 'b595bb2d-1dfc-4410-af7b-9494c4b2b027',
 '07f8620a-65de-47dc-a0fb-1f416f22e4c7',
 '93aa3838-4e9b-4084-85a9-6985e02d5b18',
 '89b0371c-637c-4f8b-9a7d-80183695d13e',
 'e38f711e-1dc7-43ae-b068-289717d5f116',
 '9c44a0ab-6e2a-4618-9a8a-49183c9d51d5',
 '7951c775-d05b-492e-b7d9-6fe2a13469e5',
 'c04cd936-466c-4c8d-988c-fcb8bfc286f7',
 '62bd2e04-2a0d-48cd-89a2-577e87e96d18',
 'd433fc4f-aa36-4981-a85f-7a5bd6e3dd1a',
 '49955d8e-007f-405a-a90c-2283e4cb2162',
 '959628c0-0cc9-46a6-a848-6309323908d5',
 '84dec566-5f0f-48fc-a030-3568b26d3ea8',
 '8ed45d79-f502-4bbc-b858-8dac9439f69a',
 'd5130b79-c44b-45e9-a6e8-ed7e18f95921',
 'bbcb7b70-86bf-4b88-ac97-4b2e1184391f',
 '8efae152-e7dc-41b4-8c9b-ef69b730509e',
 '492fd4fd-dc81-47a0-a41f-cab3045f233b',
 '27d66687-5bfa-4066-ad81-247052f59954',
 'dbee5495-6025-4134-aebc-44f39ac314ef',
 'fdff51cb-47ca-4b4e-8dfa-a2cc11f1701d',
 '3b51237a-877c-4418-bd9c-005d8f230a6f',
 '4cb26c9a-c933-

In [13]:
vs.save_local()

INFO:src.ingestion.VectorStoreManager:Saved index 'pdfs' to /home/ashmit/work/SEM_VIII/EnhancedRAG/context/faiss_indexes


In [14]:
retriever = vs.retriever(search_type = "similarity", search_kwargs = {"k":10})

INFO:src.ingestion.VectorStoreManager:Created retriever for 'pdfs' with {'search_type': 'similarity', 'search_kwargs': {'k': 10}}


In [16]:
retriever.invoke("projects by snakalp")

[METRICS] embed_query: time=0.05s, count=8


[Document(id='2ec59ff3-c8db-4fd4-a857-5546a86d9931', metadata={'producer': 'Skia/PDF m136 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Project_4_Sankalp_Mane.docx', 'source': '/home/ashmit/work/SEM_VIII/EnhancedRAG/context/pdfs/Project_4_Sankalp_Mane.pdf', 'total_pages': 23, 'page': 3, 'page_label': '4', 'chunk_index': 0, 'tokenizer': 'sentence-transformers/all-mpnet-base-v2', 'tokens': 184}, page_content='acknowledgement i am highly grateful to dr. soharab hossain, associate professor and dr. sandeep kumar, assistant professor at bml munjal university, gurugram, for providing supervision to carry out the seminar / case study from aug - december 2024. dr. soharab and dr. sandeep have provided great help in carrying out my work and is acknowledged with reverential thanks. without wise counsel and able guidance, it would have been impossible to complete the training in this manner. i would like to express thanks profusely to thank dr. soharab and dr. sandeep f

In [18]:
from huggingface_hub import InferenceClient

In [2]:
from src.utils.ModelLister import HuggingFaceModelLister

In [4]:
lister = HuggingFaceModelLister()

models = lister.list_models(task="text-generation",filter="text-generation-inference",inference="warm")

[METRICS] list_models: time=0.32s, count=10


In [24]:
models

['bigscience/bloom',
 'microsoft/phi-2',
 'Qwen/QwQ-32B',
 'openai-community/gpt2',
 'tiiuae/falcon-40b',
 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
 'microsoft/phi-4',
 'databricks/dolly-v2-12b',
 'Qwen/Qwen2.5-Coder-32B-Instruct',
 'Qwen/QwQ-32B-Preview']

In [None]:
client = InferenceClient(
    provider="hf-inference",
    api_key=settings.HF_TOKEN.get_secret_value(),
)

completion = client.chat.completions.create(
    model="nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
    messages=[
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ],
    max_tokens=500,
)



HfHubHTTPError: (Request ID: Root=1-680dc39d-354fddbb602f215f3923108f;edafa828-830d-4213-b8db-ec8700e37721)

403 Forbidden: None.
Cannot access content at: https://router.huggingface.co/hf-inference/models/microsoft/phi-4/v1/chat/completions.
Make sure your token has the correct permissions.
The model microsoft/phi-4 is too large to be loaded automatically (29GB > 10GB).

In [28]:
completion

ChatCompletionOutput(choices=[ChatCompletionOutputComplete(finish_reason='stop', index=0, message=ChatCompletionOutputMessage(role='assistant', content='The capital of France is **Paris**.', tool_call_id=None, tool_calls=None), logprobs=None)], created=1745731031, id='', model='nvidia/Llama-3.1-Nemotron-70B-Instruct-HF', system_fingerprint='3.2.1-sha-4d28897', usage=ChatCompletionOutputUsage(completion_tokens=10, prompt_tokens=22, total_tokens=32), object='chat.completion')

In [32]:
completion.choices[0].message.content

'The capital of France is **Paris**.'

In [37]:
completion.usage.total_tokens

32