In [None]:
#CHROMA config

# client = chromadb.CloudClient(
#   api_key='ck-4B9mg6mzjdkBmh78pxKaUyCDci9DdfUUrAEbPkMXee2Z',
#   tenant='43fde602-a9c4-46e5-9947-6cad654de3ca',
#   database='policy_details'
# )

In [None]:
https://livevirtual.webex.com/livevirtual/j.php?MTID=md5f15f9eff0210a2ac223912783fe272

1. ImprovedPolicyRetriever
2. ImprovedAccuratePolicyResponder
3. ConversationManager

In [43]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
from typing import List, Dict, Any, Optional

# LangChain imports (v0.3.x compatible)
from langchain_core.documents import Document
from langchain_core.vectorstores import VectorStore
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Additional imports
import warnings
warnings.filterwarnings('ignore')


In [44]:
import getpass
import os

# Prompt user for OpenAI API key securely
if "OPENAI_API_KEY" not in os.environ:
    openai_api_key = getpass.getpass("Enter your OpenAI API key: ")
    os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [45]:
chroma_client = chromadb.CloudClient(
  api_key='ck-4B9mg6mzjdkBmh78pxKaUyCDci9DdfUUrAEbPkMXee2Z',
  tenant='43fde602-a9c4-46e5-9947-6cad654de3ca',
  database='policy_details'
)


collection = chroma_client.get_collection(name="policy_embeddings")



In [46]:
vectorstore = Chroma(
    client=chroma_client,
    collection_name="policy_embeddings",
    embedding_function=embeddings
)

# Step 5: Perform similarity search on a query set
query_set = [
    "How do I use the flexible spending account to pay for the gym?",
    "When should beneficiaries be updated for my 401k account?",
    "What are the timings for before and after school child care?",
    "How is HSA useful for retirement?",
    "Can I work from a different state?"
]

sim_scores = []
for query in query_set:
    # Perform similarity search with top 3 results
    results = vectorstore.similarity_search_with_relevance_scores(query, k=3)
    for res, score in results:
        sim_scores.append({
            'query': query,
            'Similarity': score,
            'chunk': res.page_content,
            'metadata': res.metadata
        })

# Step 6: Convert results to DataFrame
sim_scores_df = pd.DataFrame(sim_scores)

# Step 7: Display or process results
sim_scores_df.sort_values(by=['query','Similarity'], ascending=False)

Unnamed: 0,query,Similarity,chunk,metadata
3,When should beneficiaries be updated for my 40...,0.365149,"information about plan changes, fee disclosure...","{'policy': '401k-retirement-policy.pdf', 'page..."
4,When should beneficiaries be updated for my 40...,0.242315,the contribution amounts. You can opt out of a...,"{'policy': '401k-retirement-policy.pdf', 'page..."
5,When should beneficiaries be updated for my 40...,0.232506,The 401(k) plan allows loans for participants ...,"{'page': 4, 'policy': '401k-retirement-policy...."
6,What are the timings for before and after scho...,0.256837,can care for children with minor illnesses who...,"{'policy': 'childcare-policy.pdf', 'page': 3}"
7,What are the timings for before and after scho...,0.242333,Eligibility and Enrollment\nAll full-time empl...,"{'page': 5, 'policy': 'childcare-policy.pdf'}"
8,What are the timings for before and after scho...,0.241527,Our Family Services coordinator in HR holds mo...,"{'page': 6, 'policy': 'childcare-policy.pdf'}"
9,How is HSA useful for retirement?,0.105311,We also provide guidance on coordinating your ...,"{'page': 6, 'policy': '401k-retirement-policy...."
10,How is HSA useful for retirement?,0.096668,TechLance’s 401(k) retirement plan represents ...,"{'policy': '401k-retirement-policy.pdf', 'page..."
11,How is HSA useful for retirement?,0.089126,the contribution amounts. You can opt out of a...,"{'policy': '401k-retirement-policy.pdf', 'page..."
0,How do I use the flexible spending account to ...,0.333307,"In addition to traditional gym memberships, al...","{'page': 2, 'policy': 'gym-policy.pdf'}"


### Enchanced Jargon Simplifier

In [47]:
class EnhancedJargonSimplifier:
    """Simplifies policy language while maintaining accuracy and citations."""
    
    def __init__(self, model_name: str = "gpt-3.5-turbo"):
        self.llm = ChatOpenAI(model=model_name, temperature=0.2)
        
    def simplify_with_citations(self, policy_text: str, user_query: str, documents: List[Document]) -> str:
        """Simplify policy text while maintaining accuracy and adding citations."""
        
        # Get policy names for citations
        policy_names = []
        for doc in documents:
            policy_name = doc.metadata.get('policy_type', 'Unknown Policy')
            if policy_name not in policy_names:
                policy_names.append(policy_name)
        
        policy_context = ", ".join(policy_names)
        
        simplification_prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert at simplifying complex HR policy language while maintaining complete accuracy.

            Your goals:
            1. Make complex policy text easy to understand
            2. Maintain 100% accuracy - do not change any facts, numbers, or requirements
            3. Include proper citations to specific policies
            4. Use simple, clear language
            5. Organize information with bullet points when helpful
            6. Explain technical terms in parentheses
            7. Keep all specific details like timeframes, amounts, and eligibility requirements

            Guidelines:
            - Use "you" instead of "the employee"
            - Replace jargon with simple terms but keep the meaning exact
            - Add citations like "According to the [Policy Name]..."
            - If there are conditions or exceptions, state them clearly
            - Use bullet points for lists of benefits or requirements
            - Maintain professional but friendly tone"""),
            
            ("user", """User Question: {user_query}

            Policy Information to Simplify:
            {policy_text}

            Available Policy Documents: {policy_context}

            Please simplify this information while maintaining complete accuracy and including proper citations.""")
        ])
        
        simplification_chain = simplification_prompt | self.llm | StrOutputParser()
        
        simplified = simplification_chain.invoke({
            "user_query": user_query,
            "policy_text": policy_text,
            "policy_context": policy_context
        })
        
        return simplified

# Initialize enhanced simplifier
enhanced_simplifier = EnhancedJargonSimplifier()

### 1. Improved Policy Retriever

In [48]:
class ImprovedPolicyRetriever:
    """Enhanced retrieval system with better document matching and filtering."""
    
    def __init__(self, vectorstore: VectorStore, top_k: int = 8):
        self.vectorstore = vectorstore
        self.top_k = top_k
        self.retriever = vectorstore.as_retriever(
            search_type="similarity",
            search_kwargs={"k": top_k}
        )
    
    def retrieve_relevant_policies(self, query: str) -> List[Document]:
        """Retrieve and filter the most relevant policy documents."""
        # Get initial results
        initial_results = self.retriever.invoke(query)
        
        # Filter for relevance and diversity
        filtered_results = self._filter_and_diversify_results(initial_results, query)
        
        return filtered_results
    
    def _filter_and_diversify_results(self, documents: List[Document], query: str) -> List[Document]:
        """Filter results for relevance and ensure diversity of policy types."""
        if not documents:
            return documents
        
        # Group by policy type to ensure diversity
        policy_groups = {}
        for doc in documents:
            policy_type = doc.metadata.get('policy_type', 'Unknown')
            if policy_type not in policy_groups:
                policy_groups[policy_type] = []
            policy_groups[policy_type].append(doc)
        
        # Select best documents from each relevant policy type
        final_results = []
        query_lower = query.lower()
        
        # Define policy keywords for better matching
        policy_keywords = {
            'vacation': ['vacation', 'time off', 'pto', 'leave', 'holiday'],
            '401k': ['401k', 'retirement', 'pension', 'savings', 'matching'],
            'health': ['health', 'medical', 'insurance', 'coverage', 'benefits'],
            'childcare': ['childcare', 'daycare', 'child care', 'family', 'dependent'],
            'gym': ['gym', 'fitness', 'wellness', 'exercise', 'health club'],
            'tuition': ['tuition', 'education', 'learning', 'school', 'training'],
            'work from home': ['remote', 'work from home', 'telecommute', 'wfh', 'hybrid'],
            'life insurance': ['life insurance', 'death benefit', 'beneficiary']
        }
        
        # Score policy types by relevance to query
        policy_scores = {}
        for policy_type, docs in policy_groups.items():
            score = 0
            policy_lower = policy_type.lower()
            
            # Direct policy name match
            if any(keyword in policy_lower for keyword in query_lower.split()):
                score += 10
            
            # Keyword matching
            for keyword_group, keywords in policy_keywords.items():
                if keyword_group in policy_lower:
                    if any(keyword in query_lower for keyword in keywords):
                        score += 5
            
            # Content relevance (simple keyword matching)
            for doc in docs[:2]:  # Check first 2 docs from each policy
                content_lower = doc.page_content.lower()
                if any(keyword in content_lower for keyword in query_lower.split()):
                    score += 1
            
            policy_scores[policy_type] = score
        
        # Sort policy types by relevance score and select top documents
        sorted_policies = sorted(policy_scores.items(), key=lambda x: x[1], reverse=True)
        
        # Take documents from most relevant policies
        for policy_type, score in sorted_policies:
            if score > 0:  # Only include policies with some relevance
                # Add up to 2 documents from each relevant policy type
                docs_to_add = policy_groups[policy_type][:2]
                final_results.extend(docs_to_add)
                
                # Limit total results
                if len(final_results) >= 5:
                    break
        
        return final_results[:5]  # Return top 5 most relevant documents
    
    def search_policies(self, query: str) -> Dict[str, Any]:
        """Search policies with improved filtering and return formatted results."""
        relevant_docs = self.retrieve_relevant_policies(query)
        formatted_context = self.format_retrieved_context(relevant_docs)
        
        return {
            "query": query,
            "relevant_documents": relevant_docs,
            "formatted_context": formatted_context,
            "num_sources": len(relevant_docs)
        }
    
    def format_retrieved_context(self, docs: List[Document]) -> str:
        """Format retrieved documents into a single context string."""
        if not docs:
            return "No relevant policy information found."
        
        context_parts = []
        for i, doc in enumerate(docs, 1):
            policy_type = doc.metadata.get('policy', 'Unknown Policy')
            filename = doc.metadata.get('filename', 'unknown.pdf')
            content = doc.page_content.strip()
            
            # Clean up content
            content = content.replace('\\n', ' ').replace('  ', ' ')
            
            context_parts.append(f"[Source {i} - {policy_type} ({filename})]\\n{content}")
        
        return "\\n\\n".join(context_parts)

# Initialize the improved policy retriever
improved_retriever = ImprovedPolicyRetriever(vectorstore, top_k=8)

# Test the improved retrieval system
print("🧪 Testing Improved Policy Retrieval:")
test_queries = ["Can I join the gym?","What is the vacation policy?", "How does the 401k plan work?", "What's the health insurance coverage?"]

for query in test_queries:
    print(f"\\nQuery: '{query}'")
    result = improved_retriever.search_policies(query)
    print(f"Found {result['num_sources']} relevant sources")
    
    # Show which policy types were retrieved
    policy_types = [doc.metadata.get('policy', 'Unknown') for doc in result['relevant_documents']]
    print(f"Policy types: {set(policy_types)}")
    print("-" * 50)

🧪 Testing Improved Policy Retrieval:
\nQuery: 'Can I join the gym?'
Found 2 relevant sources
Policy types: {'gym-policy.pdf'}
--------------------------------------------------
\nQuery: 'What is the vacation policy?'
Found 2 relevant sources
Policy types: {'vacation-policy.pdf'}
--------------------------------------------------
\nQuery: 'How does the 401k plan work?'
Found 2 relevant sources
Policy types: {'401k-retirement-policy.pdf'}
--------------------------------------------------
\nQuery: 'What's the health insurance coverage?'
Found 2 relevant sources
Policy types: {'health-insurance-policy.pdf'}
--------------------------------------------------


### 2. ImprovedAccuratePolicyResponder

In [49]:
class ImprovedAccuratePolicyResponder:
    """Enhanced policy responder with better accuracy and context handling."""
    
    def __init__(self, model_name: str = "gpt-4o-mini"):
        self.llm = ChatOpenAI(model=model_name, temperature=0.0)  # Zero temperature for maximum accuracy
        
    def generate_accurate_response(self, user_query: str, retrieved_documents: List[Document]) -> str:
        """Generate an accurate response with proper citations and context validation."""
        
        if not retrieved_documents:
            return "I couldn't find relevant policy information for your question. Please try rephrasing your question or contact HR directly for assistance."
        
        # Format the retrieved documents with better structure
        formatted_sources = []
        for i, doc in enumerate(retrieved_documents, 1):
            policy_name = doc.metadata.get('policy', 'Unknown Policy')
            filename = doc.metadata.get('filename', 'unknown.pdf')
            content = doc.page_content.strip()
            
            # Clean and format content
            content = self._clean_content(content)
            
            formatted_sources.append(f"===== SOURCE {i}: {policy_name} ({filename}) =====\\n{content}")
        
        sources_text = "\\n\\n".join(formatted_sources)
        
        response_prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert HR assistant providing accurate information about company benefits policies. 
            You must provide precise, helpful responses based ONLY on the provided policy documents.

            CRITICAL REQUIREMENTS:
            1. Only use information explicitly stated in the provided policy documents
            2. If the policy documents don't contain information to answer the question, say so clearly
            3. Always cite the specific policy document when providing information
            4. Provide specific details like eligibility requirements, timeframes, amounts, and procedures
            5. Use clear, simple language while maintaining complete accuracy
            6. If multiple policies apply, reference each one appropriately
            7. If there are conditions, exceptions, or limitations, state them clearly
            8. Do not make assumptions or inferences beyond what's explicitly stated

            RESPONSE FORMAT:
            - Start with a direct answer to the user's question
            - Provide specific details with citations
            - Use bullet points for multiple items or requirements
            - Include any important conditions or exceptions
            - Reference specific policy documents by name

            CITATION FORMAT: "According to the [Policy Name]..." or "As stated in the [Policy Name]..."

            If you cannot find relevant information in the provided documents, respond with:
            "I don't have information about [specific topic] in the available policy documents. Please contact HR for assistance with this question."
            """),
            
            ("user", """User Question: {user_query}

            Policy Documents Available:
            {sources_text}

            Please provide a comprehensive and accurate answer based on the policy information provided. 
            Include specific citations and details.""")
        ])
        
        response_chain = response_prompt | self.llm | StrOutputParser()
        
        try:
            response = response_chain.invoke({
                "user_query": user_query,
                "sources_text": sources_text
            })
            
            return response.strip()
            
        except Exception as e:
            return f"I encountered an error processing your request: {str(e)}. Please try again or contact HR for assistance."
    
    def _clean_content(self, content: str) -> str:
        """Clean and format document content for better processing."""
        # Remove excessive whitespace
        content = ' '.join(content.split())
        
        # Remove common OCR artifacts
        content = content.replace('\\n', ' ')
        content = content.replace('\\t', ' ')
        content = content.replace('  ', ' ')
        
        # Ensure reasonable length
        if len(content) > 2000:
            content = content[:2000] + "..."
        
        return content.strip()
    
    def add_source_references(self, response: str, documents: List[Document]) -> str:
        """Add a clean source reference section to the response."""
        
        if not documents:
            return response
        
        # Create source references without duplicates
        source_refs = []
        seen_sources = set()
        
        for doc in documents:
            policy_name = doc.metadata.get('policy', 'Unknown Policy')
            filename = doc.metadata.get('filename', 'unknown.pdf')
            source_key = f"{policy_name} ({filename})"
            
            if source_key not in seen_sources:
                source_refs.append(source_key)
                seen_sources.add(source_key)
        
        # Add clean source section
        if source_refs:
            response += "\n\n" + "=" * 50
            response += "\n**Policy Documents Referenced:**"
            for i, source in enumerate(source_refs, 1):
                response += f"\n {i}. {source}"
        
        return response

# Initialize the improved responder
improved_responder = ImprovedAccuratePolicyResponder()

### 3. ConversationManager

In [50]:
class ConversationManager:
    """Manages conversation context and handles follow-up questions."""
    
    def __init__(self, model_name: str = "gpt-3.5-turbo"):
        self.llm = ChatOpenAI(model=model_name, temperature=0.3)
        self.conversation_history = []
        self.current_context = {}
    
    def add_to_history(self, user_query: str, response: str, context: Dict[str, Any] = None):
        """Add an interaction to conversation history."""
        interaction = {
            "timestamp": pd.Timestamp.now(),
            "user_query": user_query,
            "response": response,
            "context": context or {}
        }
        self.conversation_history.append(interaction)
        
        # Keep only last 10 interactions to manage context length
        if len(self.conversation_history) > 10:
            self.conversation_history = self.conversation_history[-10:]
    
    def format_conversation_history(self) -> str:
        """Format conversation history for context."""
        if not self.conversation_history:
            return "No previous conversation"
        
        history_text = "Previous conversation:\\n"
        for i, interaction in enumerate(self.conversation_history[-5:], 1):
            history_text += f"\nQ{i}: {interaction['user_query']}"
            history_text += f"\nA{i}: {interaction['response'][:200]}..."  # Truncate for brevity
        
        return history_text
    
    def handle_followup_question(self, 
                                user_query: str, 
                                policy_retriever: ImprovedPolicyRetriever,
                                simplifier: EnhancedJargonSimplifier) -> str:
        """Handle follow-up questions with conversation context."""
        
        followup_prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert HR assistant handling follow-up questions about 
            company benefits. Use the conversation history to understand the context and 
            provide relevant, consistent answers.

            Guidelines:
            - Reference previous parts of the conversation when relevant
            - Maintain consistency with previous answers
            - If the question requires new policy information, indicate you'll search for it
            - If clarifying a previous answer, be more specific
            - Keep answers concise but complete"""),
            
            ("user", """Conversation History:
            {conversation_history}

            Current Question: {current_question}

            Relevant Policy Information:
            {policy_context}

            Please provide a helpful answer that considers the conversation context.""")
        ])
        
        # Get relevant policy information
        retrieval_result = policy_retriever.search_policies(user_query)
        
        # Simplify the retrieved policy text
        simplified_context = simplifier.simplify_with_context(
            retrieval_result['formatted_context'], 
            user_query
        )
        
        followup_chain = followup_prompt | self.llm | StrOutputParser()
        
        response = followup_chain.invoke({
            "conversation_history": self.format_conversation_history(),
            "current_question": user_query,
            "policy_context": simplified_context
        })
        
        # Add to history
        self.add_to_history(user_query, response, {
            "policy_sources": len(retrieval_result['relevant_documents']),
            "response_type": "followup"
        })
        
        return response
    
    def detect_followup_intent(self, user_query: str) -> bool:
        """Detect if the query is a follow-up question."""
        followup_indicators = [
            "what about", "and what", "also", "additionally", "furthermore",
            "can you explain", "what if", "how about", "in that case",
            "follow up", "more details", "elaborate", "clarify"
        ]
        
        query_lower = user_query.lower()
        return any(indicator in query_lower for indicator in followup_indicators)
    
    def clear_history(self):
        """Clear conversation history."""
        self.conversation_history = []
        self.current_context = {}

# Initialize conversation manager
conversation_manager = ConversationManager()

print("Conversation manager initialized!")
print("Sample follow-up indicators detected:")
test_queries = [
    "What about parental leave?",
    "Can you explain more about the eligibility requirements?", 
    "What if I'm part-time?",
    "How much vacation do I get?"
]

for query in test_queries:
    is_followup = conversation_manager.detect_followup_intent(query)
    print(f"'{query}' -> Follow-up: {is_followup}")

Conversation manager initialized!
Sample follow-up indicators detected:
'What about parental leave?' -> Follow-up: True
'Can you explain more about the eligibility requirements?' -> Follow-up: True
'What if I'm part-time?' -> Follow-up: True
'How much vacation do I get?' -> Follow-up: False


### 4. Complete Explainer

In [51]:
class ImprovedPolicyExplainerChatbot:
    """Enhanced Policy Explainer focused on accuracy and proper document retrieval."""
    
    def __init__(self, vectorstore: VectorStore):
        self.policy_retriever = ImprovedPolicyRetriever(vectorstore)
        self.accurate_responder = ImprovedAccuratePolicyResponder()
        self.conversation_manager = ConversationManager()
    
    def process_query(self, user_query: str) -> Dict[str, Any]:
        """Process a user query with improved accuracy and retrieval."""
        
        print(f"\\n Processing query: '{user_query}'")
        
        # Step 1: Retrieve relevant policy information with improved filtering
        print(" Retrieving relevant policies...")
        retrieval_result = self.policy_retriever.search_policies(user_query)
        
        if not retrieval_result['relevant_documents']:
            return {
                "query": user_query,
                "response": "I couldn't find relevant policy information for your question. Please try rephrasing your question or contact HR directly for assistance.",
                "sources_used": 0,
                "source_documents": []
            }
        
        # Step 2: Generate accurate response with improved processing
        print(" Generating accurate response...")
        accurate_response = self.accurate_responder.generate_accurate_response(
            user_query, 
            retrieval_result['relevant_documents']
        )
        
        # Step 3: Add source references
        final_response = self.accurate_responder.add_source_references(
            accurate_response,
            retrieval_result['relevant_documents']
        )
        
        # Step 4: Add to conversation history
        self.conversation_manager.add_to_history(
            user_query, 
            final_response,
            {"sources_used": retrieval_result['num_sources']}
        )
        
        return {
            "query": user_query,
            "response": final_response,
            "sources_used": retrieval_result['num_sources'],
            "source_documents": [doc.metadata.get('policy_type', 'Unknown') 
                               for doc in retrieval_result['relevant_documents']]
        }
    
    def handle_followup(self, user_query: str) -> str:
        """Handle follow-up questions with improved context."""
        print(f"\n Handling follow-up: '{user_query}'")
        
        # Get relevant policy information for follow-up
        retrieval_result = self.policy_retriever.search_policies(user_query)
        
        if not retrieval_result['relevant_documents']:
            return "I couldn't find relevant policy information for your follow-up question. Please try rephrasing or contact HR directly."
        
        # Generate response with improved processing
        response = self.accurate_responder.generate_accurate_response(
            user_query,
            retrieval_result['relevant_documents']
        )
        
        # Add source references
        final_response = self.accurate_responder.add_source_references(
            response,
            retrieval_result['relevant_documents']
        )
        
        # Update conversation history
        self.conversation_manager.add_to_history(user_query, final_response)
        
        return final_response
    
    def chat_session(self):
        """Start an interactive chat session with improved accuracy."""
        print(" Welcome to the TechLance Benefits Policy Explainer!")
        print("=" * 60)
        print("I provide accurate, well-cited information from your company's benefits policies.")
        print("All responses are based strictly on official policy documents.")
        print("\nType your questions about company benefits policies.")
        print("Type 'quit' to exit, 'clear' to start fresh.")
        print("-" * 60)
        
        while True:
            try:
                user_input = input("\n You: ").strip()
                
                if user_input.lower() in ['quit', 'exit', 'bye']:
                    print("\n Thank you for using the Policy Explainer! Goodbye!")
                    break
                
                elif user_input.lower() == 'clear':
                    self.conversation_manager.clear_history()
                    print("Conversation history cleared!")
                    continue
                
                elif not user_input:
                    continue
                
                # Detect if it's a follow-up question
                if (len(self.conversation_manager.conversation_history) > 0 and 
                    self.conversation_manager.detect_followup_intent(user_input)):
                    response = self.handle_followup(user_input)
                    print(f"\n Assistant: {response}")
                else:
                    result = self.process_query(user_input)
                    print(f"\n Assistant: {result['response']}")
                    if result['sources_used'] > 0:
                        print(f"\n Sources consulted: {result['sources_used']}")
                
            except KeyboardInterrupt:
                print("\n \n Session interrupted. Goodbye!")
                break
            except Exception as e:
                print(f"\n Error: {e}")
                continue

# Initialize the improved chatbot system
chatbot = ImprovedPolicyExplainerChatbot(vectorstore)

#### Testing Chatbot

In [52]:

# Test with improved system
test_queries = [
    "What is the vacation policy?",
    "How does the 401k plan work?", 
    "What health insurance is available?",
    "Can I work from home?",
    "What maternity leave benefits are available?"
]

for i, query in enumerate(test_queries, 1):
    print(f"\n--- Test Query {i} ---")
    print(f"Question: {query}")
    
    # Test improved retrieval first
    retrieval_result = chatbot.policy_retriever.search_policies(query)
    print(f"\nRetrieved {retrieval_result['num_sources']} documents")
    
    # Show policy types retrieved
    if retrieval_result['relevant_documents']:
        policy_types = [doc.metadata.get('policy', 'Unknown') for doc in retrieval_result['relevant_documents']]
        unique_types = list(set(policy_types))
        print(f"Policy types found: {unique_types}")
    
    # Test full response
    result = chatbot.process_query(query)
    print(f"\nResponse preview: {result['response'][:500]}...")
    print(f"Sources used: {result['sources_used']}")
    
    # Clear conversation for next test
    chatbot.conversation_manager.clear_history()
    print("-" * 50)




--- Test Query 1 ---
Question: What is the vacation policy?

Retrieved 2 documents
Policy types found: ['vacation-policy.pdf']
\n Processing query: 'What is the vacation policy?'
 Retrieving relevant policies...
 Generating accurate response...

Response preview: The vacation policy at TechLance provides a structured approach to vacation time for employees, emphasizing the importance of taking time off for personal well-being and productivity. Here are the key details:

### Vacation Accrual and Allocation
- **Eligibility**: The policy applies to all regular full-time and part-time employees.
- **Vacation Days**:
  - **First Year**: Employees receive 15 vacation days (120 hours).
  - **Second to Fifth Year**: Increases to 20 vacation days (160 hours).
  -...
Sources used: 2
--------------------------------------------------

--- Test Query 2 ---
Question: How does the 401k plan work?

Retrieved 2 documents
Policy types found: ['401k-retirement-policy.pdf']
\n Processing query: 'How doe

In [39]:
chatbot.chat_session()

 Welcome to the TechLance Benefits Policy Explainer!
I provide accurate, well-cited information from your company's benefits policies.
All responses are based strictly on official policy documents.

Type your questions about company benefits policies.
Type 'quit' to exit, 'clear' to start fresh.
------------------------------------------------------------



 You:  hi


\n Processing query: 'hi'
 Retrieving relevant policies...
 Generating accurate response...

 Assistant: I don't have information about greetings or general inquiries in the available policy documents. Please contact HR for assistance with this question.

**Policy Documents Referenced:**
 1. gym-policy.pdf (unknown.pdf)
 2. life-insurance-policy.pdf (unknown.pdf)

 Sources consulted: 2



 You:  how are you ?


\n Processing query: 'how are you ?'
 Retrieving relevant policies...
 Generating accurate response...

 Assistant: I don't have information about how I am in the available policy documents. Please contact HR for assistance with this question.

**Policy Documents Referenced:**
 1. health-insurance-policy.pdf (unknown.pdf)

 Sources consulted: 2



 You:  i want to go on holiday


\n Processing query: 'i want to go on holiday'
 Retrieving relevant policies...
 Generating accurate response...

 Assistant: To go on holiday, you need to follow the vacation policy outlined in the company documents. Here are the key points you should consider:

- **Eligibility to Use Vacation Time**: 
  - Newly hired employees must complete 90 days of employment before they can use any vacation time, unless it was pre-planned and approved during the hiring process or in cases of genuine emergencies that require manager approval. (According to the vacation-policy.pdf)

- **Vacation Accrual**: 
  - Vacation time accrues monthly at a rate of one-twelfth of your annual allocation. If you start on July 1st, for example, you would receive half of your annual vacation allocation for that first year, with the remainder available on your first anniversary. (According to the vacation-policy.pdf)

- **Scheduling and Approval Process**: 
  - You must provide advance notice for your vacation requ


 You:  Can i join the gym


\n Processing query: 'Can i join the gym'
 Retrieving relevant policies...
 Generating accurate response...

 Assistant: Yes, you can join the gym through the corporate gym membership program offered by TechLance.

Here are the key details regarding gym membership:

- **Corporate Discounts**: You can save between 25% to 50% on gym memberships depending on the facility tier. For example, a typical $50 monthly gym membership might cost you only $25 to $37.50 with the corporate discounts (as stated in the gym-policy.pdf).

- **Active Membership**: Generally, you can only have one active corporate membership at a time. However, you may access different locations within the same network or chain (according to the gym-policy.pdf).

- **Membership Continuation**: If you leave TechLance, the corporate discount rates will end on your last day of employment. You can typically continue your membership at regular rates or cancel according to the facility’s standard terms (as mentioned in the gym-p


 You:  quit



 Thank you for using the Policy Explainer! Goodbye!
