In [7]:
import os
import glob
from llama_index.core import SimpleDirectoryReader  # LlamaIndex's file loader

# Define the folder path
folder_path = "../hrdata/"

# Load all `.txt` and `.pdf` files from the folder
text_documents = SimpleDirectoryReader(folder_path, required_exts=[".txt", ".pdf"]).load_data()

# Display loaded documents
text_documents[:2]

 Document(id_='1bd24ac8-4424-4c98-833b-1bde8da0f10b', embedding=None, metadata={'file_path': 'f:\\redsoft-projects\\hr-chatbot\\rag\\..\\hrdata\\faqs.txt', 'file_name': 'faqs.txt', 'file_type': 'text/plain', 'file_size': 3329, 'creation_date': '2025-04-08', 'last_modified_date': '2025-04-08'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='🔹 General Questions\r\nQ1. How do I check my leave balance?\r\nA: You can check your leave balance by logging into the HRMS portal and navigating to: Dashboard > Leave Summary.\r\n\r\nQ2. Whom should I contact for any HR-related issues?\r\nA: You can raise a query in the HRMS Helpdesk or email us at hr.

In [8]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
QDRANT_URL  = os.getenv('QDRANT_URL')
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')

In [9]:
from llama_index.core.node_parser import SentenceWindowNodeParser  # LlamaIndex text splitter

# Define text splitter (similar to RecursiveCharacterTextSplitter)
text_splitter = SentenceWindowNodeParser.from_defaults( 
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)  

# Split the documents
split_documents = text_splitter.get_nodes_from_documents(text_documents)

# Display split documents
split_documents[:5]


[TextNode(id_='a435a21f-df16-4371-a592-91fb98eb9150', embedding=None, metadata={'file_path': 'f:\\redsoft-projects\\hr-chatbot\\rag\\..\\hrdata\\code_of_conduct.txt', 'file_name': 'code_of_conduct.txt', 'file_type': 'text/plain', 'file_size': 3440, 'creation_date': '2025-04-08', 'last_modified_date': '2025-04-08', 'window': '📄 Company Code of Conduct\r\nDocument Title: Code of Conduct\r\nApplies To: All Employees, Interns, Contractors, and Consultants\r\nEffective Date: January 1, 2025\r\nLast Reviewed: March 15, 2025\r\n\r\n🟦 1.  Professionalism and Workplace Behavior\r\nEmployees are expected to maintain professionalism in behavior, language, and attire at all times.\r\n\r\n Treat colleagues, clients, and vendors with respect, courtesy, and fairness.\r\n\r\n Bullying, harassment (including sexual harassment), and discrimination of any kind are strictly prohibited.\r\n\r\n', 'original_text': '📄 Company Code of Conduct\r\nDocument Title: Code of Conduct\r\nApplies To: All Employees, In

In [10]:

import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
from llama_index.embeddings.openai import OpenAIEmbedding

load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")

if not QDRANT_URL:
    raise ValueError("QDRANT_URL is missing. Check your .env file.")

qdrant_client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)

COLLECTION_NAME = "hr_docs"

vector_store = QdrantVectorStore(client=qdrant_client, collection_name=COLLECTION_NAME)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

embedding_model = OpenAIEmbedding() 

collections = qdrant_client.get_collections()
collection_names = [col.name for col in collections.collections]

if COLLECTION_NAME in collection_names:
    # If collection exists, load the index
    print(f"Collection '{COLLECTION_NAME}' found in Qdrant. Loading existing data...")
    # index = load_index_from_storage(storage_context, embed_model=embedding_model)
    index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embedding_model)
else:
    #  If collection doesn't exist, create index & store documents 
    print(f"Collection '{COLLECTION_NAME}' not found. Creating new index...")
    index = VectorStoreIndex.from_documents(text_documents, storage_context=storage_context, embed_model=embedding_model)
    index.storage_context.persist()

print("Qdrant is ready for querying!")


Collection 'hr_docs' not found. Creating new index...
Qdrant is ready for querying!


In [11]:
# Assuming `index` is already initialized from previous code

def query_qdrant(query: str):
    """Query the Qdrant database and return the response."""
    query_engine = index.as_query_engine()
    response = query_engine.query(query)
    return response

# Example usage:
query = "What is notice period during probation?"
result = query_qdrant(query)
print(result)  


During probation, the notice period is 15 days or payment in lieu of notice.
