In [1]:
# Test in a Python notebook
import os
from dotenv import load_dotenv
import openai
from src.document_processor.loader import DocumentLoader
from src.document_processor.text_extractor import TextExtractor
from src.document_processor.chunker import TextChunker
from src.vector_store.embeddings import EmbeddingGenerator
from src.vector_store.store import VectorStore
from src.llm.response_generator import ResponseGenerator

# Load environment variables
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Initialize components
loader = DocumentLoader()
extractor = TextExtractor()
chunker = TextChunker()
embedding_generator = EmbeddingGenerator()
vector_store = VectorStore(embedding_generator)
response_generator = ResponseGenerator()

# List available documents
files = loader.get_file_list()
print(f"Found {len(files)} documents:")
for file in files:
    print(f"- {file['name']} ({file['type']})")

# Test with plain query
test_query = "tell me about amazon 2023 performance"
search_results = vector_store.search(test_query)
result = response_generator.generate_response(test_query, search_results)

# Print raw response
print("\n=== RAW RESPONSE ===")
print(result["response"])

# Test with formatting instructions
test_query_with_instructions = """tell me about amazon 2023 performance

Financial data formatting:
* Use abbreviated formats like "514B" for billions (NOT "514 billion")
* Use "316M" for millions (NOT "316 million")
* Keep the currency symbol as mentioned in the document like "$118B" (NOT "118B")
* keep space before and after numeric financial value like "118Bto131B" (NOT "118Bto131B")
"""

search_results_2 = vector_store.search(test_query_with_instructions)
result_2 = response_generator.generate_response(test_query_with_instructions, search_results_2)

# Print raw response
print("\n=== RAW RESPONSE WITH INSTRUCTIONS ===")
print(result_2["response"])

  self.langchain_db = Chroma(


Found 1 documents:
- Amazon-com-Inc-2023-Annual-Report.pdf (pdf)

=== RAW RESPONSE ===
In 2023, Amazon demonstrated strong financial performance, as detailed in their annual report. Here is a summary of their performance:

**Overview:**
- Amazon's total revenue increased by 12% year-over-year (YoY), rising from $514B to $575B.  
  [Source: Amazon com Inc 2023 Annual Report, Page: 2]

**Segment Performance:**
- **North America:** Revenue grew by 12% YoY, from $316B to $353B.
- **International:** Revenue increased by 11% YoY, from $118B to $131B.
- **AWS (Amazon Web Services):** Revenue rose by 13% YoY, from $80B to $91B. This growth was primarily driven by increased customer usage, although it was partially offset by pricing changes due to long-term customer contracts.  
  [Source: Amazon com Inc 2023 Annual Report, Page: 2, 37]

**Financial Metrics:**
- Operating income saw a significant improvement, increasing by 201% YoY from $12.2B (operating margin of 2.4%) to $36.9B (operating mar