In [16]:
import json
import sys
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv

# Add parent directory to path and change to project rot
import os

# Get the notebook's current directory and find project root
notebook_dir = Path.cwd()
if notebook_dir.name == "notebooks":
    project_root = notebook_dir.parent
else:
    project_root = notebook_dir

# Change to project root and add to path
os.chdir(project_root)
sys.path.insert(0, str(project_root))

print(f" Working directory: {os.getcwd()}")

from src.services.llm_services import (
    load_config,
    get_llm,
    get_text_embeddings,
    validate_api_keys,
    print_config_summary
)

# Load environment variables
load_dotenv()

# Load configuration from config.yaml 
config = load_config("src/config/config.yaml")

# Validate API keys
validate_api_keys(config, verbose=True)

# Print summary
print_config_summary(config)


 Working directory: c:\Development\financial-intelligence-engine
 Config loaded:
  LLM: openrouter (openai/gpt-4o-mini)
  Embeddings: sbert / sentence-transformers/all-MiniLM-L6-v2
  Temperature: 0.2
  Artifacts: ./artifacts


In [17]:
import sentence_transformers
# Initialize LLM and Embeddings using factories from llm_services
llm = get_llm(config)
embeddings = get_text_embeddings(config)

print(f" LLM: {config['llm_provider']} / {config['llm_model']}")
print(f" Embeddings: {config['text_emb_provider']} / {config['text_emb_model']}")

# Verify API key with test completion
print("\n Testing LLM API connection...")
try:
    test_response = llm.invoke("Say 'API working!' if you can read this.")
    test_msg = test_response.content if hasattr(test_response, 'content') else str(test_response)
    print(f" LLM API verified: {test_msg[:50]}")
except Exception as e:
    print(f" LLM API test failed: {e}")
    print("  Please check your .env file and API key configuration.")


 LLM: openrouter / gpt-4o-mini
 Embeddings: sbert / sentence-transformers/all-MiniLM-L6-v2

 Testing LLM API connection...
 LLM API test failed: Error code: 401 - {'error': {'message': 'No cookie auth credentials found', 'code': 401}}
  Please check your .env file and API key configuration.


In [19]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document

pdf_dir = Path(config['data_root']) /'pdfs'

# Try loading PDFs
pdf_files = list(pdf_dir.glob("*.pdf"))

if len(pdf_files) == 0:
    print("  No PDFs found.")

else:
    # Load PDFs
    documents = []
    for pdf_path in pdf_files:
        loader = PyPDFLoader(str(pdf_path))
        docs = loader.load()
        documents.extend(docs)

print(f" Loaded {len(documents)} document pages")
print(f"  Total characters: {sum(len(d.page_content) for d in documents):,}")


 Loaded 142 document pages
  Total characters: 639,466
