# Test API Endpoints

This notebook tests the LLM, embedding, and document processing endpoints, with some prompt engineering experiments.

## Setup

In [None]:
import os
import sys
import json
import requests
import numpy as np
from dotenv import load_dotenv
import time
from typing import Dict, List

# Add parent directory to path
sys.path.append('..')

# Load environment variables
load_dotenv('../.env')

# Pretty print JSON
def print_json(data):
    print(json.dumps(data, indent=2))

## 1. Test Nomic Embed API

In [None]:
# Get Nomic Embed configuration
NOMIC_URL = os.getenv('NOMIC_EMBED_URL')
NOMIC_API_KEY = os.getenv('NOMIC_EMBED_API_KEY')
NOMIC_MODEL = os.getenv('NOMIC_EMBED_MODEL_NAME')

print(f"Nomic Embed URL: {NOMIC_URL}")
print(f"Nomic Embed Model: {NOMIC_MODEL}")

if NOMIC_URL and not NOMIC_URL.endswith('/v1'):
    NOMIC_URL = f"{NOMIC_URL}/v1"

print(f"Nomic Embed URL: {NOMIC_URL}")
print(f"Model: {NOMIC_MODEL}")

In [None]:
def get_embeddings(texts: List[str]) -> List[np.ndarray] | None:
    """Get embeddings from Nomic Embed API"""
    embeddings = []
    
    for text in texts:
        print(f"Processing text: {text}")
        response = requests.post(
            f"{NOMIC_URL}/embeddings",
            headers={
                'Authorization': f"Bearer {NOMIC_API_KEY}",
                'Content-Type': 'application/json'
            },
            json={
                'model': NOMIC_MODEL,
                'input': text
            }
        )
        
        print(f"Response: {response}")

        if response.status_code == 200:
            data = response.json()
            embedding = np.array(data['data'][0]['embedding'])
            embeddings.append(embedding)
        else:
            print(f"Error: {response.status_code} - {response.text}")
            return None
    
    return embeddings

In [None]:
# Test embeddings
test_texts = [
    "PGVector is a PostgreSQL extension for vector similarity search.",
    "Machine learning models can generate embeddings for text.",
    "The weather is nice today."
]

print("Testing embeddings...")
embeddings = get_embeddings(test_texts)
print(embeddings)

if embeddings:
    print(f"\n✅ Successfully generated {len(embeddings)} embeddings")
    print(f"Embedding dimension: {embeddings[0].shape[0]}")
    
    # Calculate similarity between texts
    from sklearn.metrics.pairwise import cosine_similarity
    
    similarities = cosine_similarity(embeddings)
    print("\nCosine similarities:")
    for i in range(len(test_texts)):
        for j in range(i+1, len(test_texts)):
            print(f"Text {i+1} <-> Text {j+1}: {similarities[i][j]:.4f}")
else:
    print("\n❌ Failed to generate embeddings")

## 2. Test Llama 3.2 API

In [None]:
# Get Llama configuration
LLAMA_URL = os.getenv('LLAMA_3_2_URL')
LLAMA_API_KEY = os.getenv('LLAMA_3_2_API_KEY')
LLAMA_MODEL = os.getenv('LLAMA_3_2_MODEL_NAME')

if LLAMA_URL and not LLAMA_URL.endswith('/v1'):
    LLAMA_URL = f"{LLAMA_URL}/v1"

print(f"Llama URL: {LLAMA_URL}")
print(f"Model: {LLAMA_MODEL}")

In [None]:
def llama_complete(prompt: str, max_tokens: int = 200, temperature: float = 0.7) -> str | None:
    """Get completion from Llama API"""
    response = requests.post(
        f"{LLAMA_URL}/completions",
        headers={
            'Authorization': f"Bearer {LLAMA_API_KEY}",
            'Content-Type': 'application/json'
        },
        json={
            'model': LLAMA_MODEL,
            'prompt': prompt,
            'max_tokens': max_tokens,
            'temperature': temperature
        }
    )
    
    if response.status_code == 200:
        return response.json()['choices'][0]['text'].strip()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

def llama_chat(messages: List[Dict], max_tokens: int = 200, temperature: float = 0.7) -> str | None:
    """Chat with Llama API"""
    response = requests.post(
        f"{LLAMA_URL}/chat/completions",
        headers={
            'Authorization': f"Bearer {LLAMA_API_KEY}",
            'Content-Type': 'application/json'
        },
        json={
            'model': LLAMA_MODEL,
            'messages': messages,
            'max_tokens': max_tokens,
            'temperature': temperature
        }
    )
    
    if response.status_code == 200:
        return response.json()['choices'][0]['message']['content']
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

In [None]:
# Test basic completion
print("Testing Llama completion...\n")

prompt = "The key benefits of using vector databases for AI applications are:"
print(f"Prompt: {prompt}\n")

response = llama_complete(prompt, max_tokens=1500)
if response:
    print(f"Response:\n{response}")

## 3. Prompt Engineering Experiments

In [None]:
# Experiment 1: Different prompt styles
prompt_styles = {
    "Direct": "What is PGVector?",
    
    "Instructional": "Explain what PGVector is in simple terms.",
    
    "Role-based": "You are a database expert. Explain what PGVector is to a beginner.",
    
    "Structured": """Task: Explain PGVector
Requirements:
- Use simple language
- Include key features
- Keep it under 100 words

Response:""",
    
    "Few-shot": """Q: What is PostgreSQL?
A: PostgreSQL is an open-source relational database management system.

Q: What is PGVector?
A:"""
}

print("Testing different prompt styles...\n")
for style, prompt in prompt_styles.items():
    print(f"\n{'='*50}")
    print(f"Style: {style}")
    print(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
    print(f"{'='*50}\n")
    
    response = llama_complete(prompt, max_tokens=100, temperature=0.5)
    if response:
        print(f"Response: {response}\n")
    
    time.sleep(1)  # Rate limiting

In [None]:
# Experiment 2: Temperature effects
prompt = "Write a creative description of vector search:"
temperatures = [0.1, 0.5, 0.9, 1.5]

print("Testing temperature effects...\n")
print(f"Prompt: {prompt}\n")

for temp in temperatures:
    print(f"\nTemperature: {temp}")
    print("-" * 40)
    response = llama_complete(prompt, max_tokens=80, temperature=temp)
    if response:
        print(response)
    time.sleep(1)

In [None]:
# Experiment 3: Chain of Thought prompting
cot_prompt = """Problem: I have 1000 documents and want to find the most relevant ones for a query.

Let's think step by step:
1. First, I need to"""

print("Testing Chain of Thought prompting...\n")
response = llama_complete(cot_prompt, max_tokens=200, temperature=0.7)
if response:
    print(f"Prompt:\n{cot_prompt}\n")
    print(f"Response:\n{response}")

## 4. Test Docling API

In [None]:
# Get Docling configuration
DOCLING_URL = os.getenv('DOCLING_URL')
DOCLING_API_KEY = os.getenv('DOCLING_API_KEY')

if DOCLING_URL and not DOCLING_URL.endswith('/v1'):
    DOCLING_URL = f"{DOCLING_URL}/v1"

print(f"Docling URL: {DOCLING_URL}")

In [None]:
# Create a test document
test_content = """# Test Document

This is a test document for the Docling API.

## Section 1: Introduction
Vector databases are essential for modern AI applications.

## Section 2: Features
- Fast similarity search
- Scalable architecture
- Multiple distance metrics

## Section 3: Conclusion
PGVector brings vector search capabilities to PostgreSQL.
"""

# Save as a file
with open('/tmp/test_document.txt', 'w') as f:
    f.write(test_content)

print("Created test document")

In [None]:
# Test Docling API
print("Testing Docling API...\n")

# First, let's test basic connectivity
print("Testing basic connectivity...")
try:
    # Try health check or basic endpoint
    base_url = DOCLING_URL.replace('/v1', '') if DOCLING_URL else ''
    health_response = requests.get(
        f"{base_url}/health",
        headers={'Authorization': f"Bearer {DOCLING_API_KEY}"},
        timeout=10
    )
    print(f"Health check status: {health_response.status_code}")
except Exception as e:
    print(f"Health check failed: {e}")

# Try multiple endpoint patterns based on research
endpoints_to_try = [
    "/v1alpha/convert/source",  # Based on web research - most likely correct
    "/v1/convert/source",       # Alternative version
    "/convert/source",          # Simplified version
    "/convert",                 # Original attempt
]

success = False

for endpoint in endpoints_to_try:
    try:
        print(f"\nTrying endpoint: {DOCLING_URL}{endpoint}")
        
        # Method 1: Try with file upload (multipart/form-data)
        with open('/tmp/test_document.txt', 'rb') as f:
            response = requests.post(
                f"{DOCLING_URL}{endpoint}",
                headers={
                    'Authorization': f"Bearer {DOCLING_API_KEY}",
                    'Accept': 'application/json'
                },
                files={'file': ('test_document.txt', f, 'text/plain')},
                timeout=30
            )
        
        print(f"  Response status: {response.status_code}")
        
        if response.status_code == 200:
            result = response.json()
            print("✅ Document processed successfully!")
            print_json(result)
            success = True
            break
        elif response.status_code == 404:
            print(f"  Endpoint not found, trying next...")
            continue
        else:
            print(f"  Error: {response.status_code} - {response.text[:200]}...")
            
    except Exception as e:
        print(f"  Error with endpoint {endpoint}: {e}")
        continue

# If file upload doesn't work, try JSON payload approach (for URL-based conversion)
if not success:
    print("\nTrying JSON payload approach with URL...")
    try:
        response = requests.post(
            f"{DOCLING_URL}/v1alpha/convert/source",
            headers={
                'Authorization': f"Bearer {DOCLING_API_KEY}",
                'Content-Type': 'application/json',
                'Accept': 'application/json'
            },
            json={
                "http_sources": [{"url": "https://arxiv.org/pdf/2408.09869"}]  # Example PDF
            },
            timeout=60
        )
        
        print(f"JSON URL approach status: {response.status_code}")
        
        if response.status_code == 200:
            result = response.json()
            print("✅ Document processed successfully with JSON URL payload!")
            print_json(result)
            success = True
        else:
            print(f"JSON URL approach failed: {response.status_code} - {response.text[:200]}...")
            
    except Exception as e:
        print(f"JSON URL approach error: {e}")

# If still no success, try direct Python library approach
if not success:
    print("\nTrying direct Python library approach...")
    try:
        # Check if docling is available
        import subprocess
        import sys
        
        # Install docling if not available
        try:
            import docling  # type: ignore
            print("Docling library already available")
        except ImportError:
            print("Installing docling...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'docling'])
            import docling  # type: ignore
        
        from docling.document_converter import DocumentConverter  # type: ignore
        
        # Convert document using local library
        converter = DocumentConverter()
        result = converter.convert('/tmp/test_document.txt')
        
        if result.status.name == 'SUCCESS':
            markdown_output = result.document.export_to_markdown()
            print("✅ Document converted successfully using local library!")
            print(f"Status: {result.status}")
            print(f"Markdown output (first 500 chars):\n{markdown_output[:500]}...")
            success = True
        else:
            print(f"Conversion failed with status: {result.status}")
            
    except Exception as e:
        print(f"Local library approach error: {e}")

if not success:
    print("\n❌ All Docling test approaches failed.")
    print("\nPossible issues:")
    print("1. The API endpoint URL is incorrect")
    print("2. The API key is invalid or expired")
    print("3. The service is not running or accessible")
    print("4. The API version or path has changed")
    print("5. Network connectivity issues")
    print("\nRecommendations:")
    print("- Check the API documentation for the correct endpoint")
    print("- Verify the API key is valid and has proper permissions")
    print("- Test with a simple curl command first:")
    print(f"  curl -X POST '{DOCLING_URL}/v1alpha/convert/source' \\")
    print(f"       -H 'Authorization: Bearer {DOCLING_API_KEY}' \\")
    print(f"       -H 'Content-Type: application/json' \\")
    print(f"       -d '{{\"http_sources\": [{{\"url\": \"https://arxiv.org/pdf/2408.09869\"}}]}}'")
    print("- Consider using the Python library directly if API access is not available")
    print("- Check if the service requires different authentication or headers")
else:
    print("\n✅ Docling test completed successfully!")

In [None]:
def process_document_with_docling(file_path: str) -> str | None:
    """Process a document using Docling's local Python library"""
    try:
        # Try to import docling
        try:
            from docling.document_converter import DocumentConverter  # type: ignore
            print("📚 Docling library is available")
        except ImportError:
            print("📦 Installing docling...")
            import subprocess
            import sys
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'docling'])
            from docling.document_converter import DocumentConverter  # type: ignore
            print("✅ Docling installed successfully")
        
        print(f"🔄 Processing document: {file_path}")
        
        # Initialize converter
        converter = DocumentConverter()
        
        # Convert document
        result = converter.convert(file_path)
        
        if result.status.name == 'SUCCESS':
            # Export to markdown
            markdown_output = result.document.export_to_markdown()
            print(f"✅ Document converted successfully!")
            print(f"📄 Status: {result.status}")
            print(f"📏 Output length: {len(markdown_output)} characters")
            return markdown_output
        else:
            print(f"❌ Conversion failed with status: {result.status}")
            return None
            
    except Exception as e:
        print(f"❌ Error processing document: {e}")
        return None

# Test the function
print("Testing Docling local processing...")
print("=" * 50)


In [None]:
# Create test documents for different formats
import os

# Create a simple text document
text_content = """# Test Document for Docling

This is a test document to demonstrate Docling's document processing capabilities.

## Section 1: Introduction
Vector databases are essential for modern AI applications. They enable efficient similarity search and retrieval of high-dimensional data.

## Section 2: Key Features
- **Fast similarity search**: Find similar items quickly
- **Scalable architecture**: Handle large datasets efficiently  
- **Multiple distance metrics**: Support various similarity measures
- **Integration friendly**: Easy to integrate with existing systems

## Section 3: Use Cases
1. **Semantic search**: Find documents by meaning, not just keywords
2. **Recommendation systems**: Suggest similar items to users
3. **RAG applications**: Retrieve relevant context for LLM responses
4. **Image search**: Find similar images using visual embeddings

## Section 4: Conclusion
PGVector brings vector search capabilities directly to PostgreSQL, making it easier to build AI-powered applications.

### Technical Details
- Supports various vector operations
- Optimized for performance
- ACID compliance
- Familiar SQL interface

*End of document*
"""

# Save the test document
test_file_path = '/tmp/docling_test_document.txt'
with open(test_file_path, 'w') as f:
    f.write(text_content)

print(f"📝 Created test document: {test_file_path}")
print(f"📏 Document length: {len(text_content)} characters")
print(f"📄 Document preview (first 200 chars):")
print(text_content[:200] + "...")


In [None]:
# Test Docling local processing
print("🚀 Testing Docling local document processing...")
print("=" * 60)

# Process the test document
markdown_result = process_document_with_docling(test_file_path)

if markdown_result:
    print(f"\n🎉 Document processing successful!")
    print(f"📊 Processed {len(markdown_result)} characters")
    
    # Show the first part of the converted output
    print(f"\n📄 Converted output (first 500 characters):")
    print("-" * 50)
    print(markdown_result[:500])
    if len(markdown_result) > 500:
        print("...")
        print(f"[{len(markdown_result) - 500} more characters]")
    print("-" * 50)
    
    # Show some statistics
    lines = markdown_result.split('\n')
    print(f"\n📈 Document Statistics:")
    print(f"   • Total lines: {len(lines)}")
    print(f"   • Non-empty lines: {len([line for line in lines if line.strip()])}")
    print(f"   • Headers found: {len([line for line in lines if line.startswith('#')])}")
    print(f"   • List items found: {len([line for line in lines if line.strip().startswith(('-', '*', '1.'))])}")
    
    print(f"\n✅ Docling local processing test completed successfully!")
    print(f"   You can now use process_document_with_docling() for document processing")
    
else:
    print(f"\n❌ Document processing failed")
    print(f"   Check the error messages above for troubleshooting")

# Clean up test file
try:
    os.remove(test_file_path)
    print(f"\n🧹 Cleaned up test file: {test_file_path}")
except:
    print(f"\n⚠️  Could not clean up test file: {test_file_path}")


## 5. Combined RAG Pipeline Test

In [None]:
# Test a mini RAG pipeline
print("Testing mini RAG pipeline...\n")

# Sample documents
documents = [
    "PGVector is a PostgreSQL extension that provides vector similarity search capabilities.",
    "Vector databases store high-dimensional vectors and enable fast similarity searches.",
    "Machine learning models convert text into numerical vectors called embeddings."
]

# Get embeddings for documents
print("1. Generating document embeddings...")
doc_embeddings = get_embeddings(documents)
if doc_embeddings is None:
    print("   Failed to generate embeddings")
    exit()
print(f"   Generated {len(doc_embeddings)} embeddings")

# Query
query = "What is PGVector used for?"
print(f"\n2. Query: {query}")

# Get query embedding
query_embeddings = get_embeddings([query])
if query_embeddings is None:
    print("   Failed to generate query embedding")
    exit()
query_embedding = query_embeddings[0]

# Find most similar document
from sklearn.metrics.pairwise import cosine_similarity
similarities = cosine_similarity([query_embedding], doc_embeddings)[0]
best_idx = np.argmax(similarities)

print(f"\n3. Most relevant document (similarity: {similarities[best_idx]:.4f}):")
print(f"   {documents[best_idx]}")

# Generate answer using context
rag_prompt = f"""Context: {documents[best_idx]}

Question: {query}

Answer based on the context:"""

print("\n4. Generating answer...")
answer = llama_complete(rag_prompt, max_tokens=100, temperature=0.3)
if answer:
    print(f"\nAnswer: {answer}")