# Test API Endpoints

This notebook tests the LLM, embedding, and document processing endpoints, with some prompt engineering experiments.

## Setup

In [2]:
import os
import sys
import json
import requests
import numpy as np
from dotenv import load_dotenv
import time
from typing import Dict, List

# Add parent directory to path
sys.path.append('..')

# Load environment variables
load_dotenv('../../.env')

# Pretty print JSON
def print_json(data):
    print(json.dumps(data, indent=2))

## 1. Test Nomic Embed API

In [16]:
# Get Nomic Embed configuration
NOMIC_URL = os.getenv('NOMIC_EMBED_URL')
NOMIC_API_KEY = os.getenv('NOMIC_EMBED_API_KEY')
NOMIC_MODEL = os.getenv('NOMIC_EMBED_MODEL_NAME')

print(f"Nomic Embed URL: {NOMIC_URL}")
print(f"Nomic Embed Model: {NOMIC_MODEL}")

if NOMIC_URL and not NOMIC_URL.endswith('/v1'):
    NOMIC_URL = f"{NOMIC_URL}/v1"

print(f"Nomic Embed URL: {NOMIC_URL}")
print(f"Model: {NOMIC_MODEL}")

Nomic Embed URL: https://nomic-embed-text-v1-5-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443
Nomic Embed Model: /mnt/models
Nomic Embed URL: https://nomic-embed-text-v1-5-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1
Model: /mnt/models


In [4]:
def get_embeddings(texts: List[str]) -> List[np.ndarray] | None:
    """Get embeddings from Nomic Embed API"""
    embeddings = []
    
    for text in texts:
        response = requests.post(
            f"{NOMIC_URL}/embeddings",
            headers={
                'Authorization': f"Bearer {NOMIC_API_KEY}",
                'Content-Type': 'application/json'
            },
            json={
                'model': NOMIC_MODEL,
                'input': text
            }
        )
        
        if response.status_code == 200:
            data = response.json()
            embedding = np.array(data['data'][0]['embedding'])
            embeddings.append(embedding)
        else:
            print(f"Error: {response.status_code} - {response.text}")
            return None
    
    return embeddings

In [5]:
# Test embeddings
test_texts = [
    "PGVector is a PostgreSQL extension for vector similarity search.",
    "Machine learning models can generate embeddings for text.",
    "The weather is nice today."
]

print("Testing embeddings...")
embeddings = get_embeddings(test_texts)

if embeddings:
    print(f"\n✅ Successfully generated {len(embeddings)} embeddings")
    print(f"Embedding dimension: {embeddings[0].shape[0]}")
    
    # Calculate similarity between texts
    from sklearn.metrics.pairwise import cosine_similarity
    
    similarities = cosine_similarity(embeddings)
    print("\nCosine similarities:")
    for i in range(len(test_texts)):
        for j in range(i+1, len(test_texts)):
            print(f"Text {i+1} <-> Text {j+1}: {similarities[i][j]:.4f}")

Testing embeddings...

✅ Successfully generated 3 embeddings
Embedding dimension: 768

Cosine similarities:
Text 1 <-> Text 2: 0.4475
Text 1 <-> Text 3: 0.3960
Text 2 <-> Text 3: 0.4076


## 2. Test Llama 3.2 API

In [6]:
# Get Llama configuration
LLAMA_URL = os.getenv('LLAMA_3-2_URL')
LLAMA_API_KEY = os.getenv('LLAMA_3-2_API_KEY')
LLAMA_MODEL = os.getenv('LLAMA_3-2_MODEL_NAME')

if LLAMA_URL and not LLAMA_URL.endswith('/v1'):
    LLAMA_URL = f"{LLAMA_URL}/v1"

print(f"Llama URL: {LLAMA_URL}")
print(f"Model: {LLAMA_MODEL}")

Llama URL: https://llama-3-2-3b-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1
Model: llama-3-2-3b


In [7]:
def llama_complete(prompt: str, max_tokens: int = 200, temperature: float = 0.7) -> str | None:
    """Get completion from Llama API"""
    response = requests.post(
        f"{LLAMA_URL}/completions",
        headers={
            'Authorization': f"Bearer {LLAMA_API_KEY}",
            'Content-Type': 'application/json'
        },
        json={
            'model': LLAMA_MODEL,
            'prompt': prompt,
            'max_tokens': max_tokens,
            'temperature': temperature
        }
    )
    
    if response.status_code == 200:
        return response.json()['choices'][0]['text'].strip()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

def llama_chat(messages: List[Dict], max_tokens: int = 200, temperature: float = 0.7) -> str | None:
    """Chat with Llama API"""
    response = requests.post(
        f"{LLAMA_URL}/chat/completions",
        headers={
            'Authorization': f"Bearer {LLAMA_API_KEY}",
            'Content-Type': 'application/json'
        },
        json={
            'model': LLAMA_MODEL,
            'messages': messages,
            'max_tokens': max_tokens,
            'temperature': temperature
        }
    )
    
    if response.status_code == 200:
        return response.json()['choices'][0]['message']['content']
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

In [17]:
# Test basic completion
print("Testing Llama completion...\n")

prompt = "The key benefits of using vector databases for AI applications are:"
print(f"Prompt: {prompt}\n")

response = llama_complete(prompt, max_tokens=1500)
if response:
    print(f"Response:\n{response}")

Testing Llama completion...

Prompt: The key benefits of using vector databases for AI applications are:

Response:
(1) fast and efficient querying, (2) scalable storage, (3) efficient data retrieval, and (4) support for complex data structures. Vector databases are particularly useful for AI applications that require fast and efficient data retrieval, such as computer vision and natural language processing. Additionally, vector databases can be used to store and manage large amounts of data, making them an attractive option for big data applications.

**Example Use Cases**

*   **Computer Vision**: Vector databases can be used to store and retrieve features extracted from images, such as object detection and facial recognition features. This can be particularly useful for applications such as surveillance systems and autonomous vehicles.
*   **Natural Language Processing**: Vector databases can be used to store and retrieve word embeddings, which can be used for tasks such as text cla

## 3. Prompt Engineering Experiments

In [9]:
# Experiment 1: Different prompt styles
prompt_styles = {
    "Direct": "What is PGVector?",
    
    "Instructional": "Explain what PGVector is in simple terms.",
    
    "Role-based": "You are a database expert. Explain what PGVector is to a beginner.",
    
    "Structured": """Task: Explain PGVector
Requirements:
- Use simple language
- Include key features
- Keep it under 100 words

Response:""",
    
    "Few-shot": """Q: What is PostgreSQL?
A: PostgreSQL is an open-source relational database management system.

Q: What is PGVector?
A:"""
}

print("Testing different prompt styles...\n")
for style, prompt in prompt_styles.items():
    print(f"\n{'='*50}")
    print(f"Style: {style}")
    print(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
    print(f"{'='*50}\n")
    
    response = llama_complete(prompt, max_tokens=100, temperature=0.5)
    if response:
        print(f"Response: {response}\n")
    
    time.sleep(1)  # Rate limiting

Testing different prompt styles...


Style: Direct
Prompt: What is PGVector?

Response: (2023)
**Overview**

PGVector is a PostgreSQL extension that provides a vector data type, allowing users to store and manipulate vectors as a first-class data type. This extension is designed to support various use cases, including computer vision, machine learning, and data analysis.

**Key Features**
---------------

*   **Vector Data Type**: PGVector provides a built-in vector data type, enabling users to store and manipulate vectors as a first-class data type.
*   **Vector Operations**:


Style: Instructional
Prompt: Explain what PGVector is in simple terms.

Response: In the particular context of the OpenCV library.
In the simple terms, a PGVector is a type of vector data structure that is used to represent a vector of points or coordinates in a 2D or 3D space. In the context of the OpenCV library, a PGVector is used to represent a set of 2D or 3D points that are used as features or descriptors

In [10]:
# Experiment 2: Temperature effects
prompt = "Write a creative description of vector search:"
temperatures = [0.1, 0.5, 0.9, 1.5]

print("Testing temperature effects...\n")
print(f"Prompt: {prompt}\n")

for temp in temperatures:
    print(f"\nTemperature: {temp}")
    print("-" * 40)
    response = llama_complete(prompt, max_tokens=80, temperature=temp)
    if response:
        print(response)
    time.sleep(1)

Testing temperature effects...

Prompt: Write a creative description of vector search:


Temperature: 0.1
----------------------------------------
Vector search is a technique used to find the most similar vectors in a high-dimensional space. It's like a superpower that helps you find the closest neighbors in a vast, complex landscape. Imagine you're a detective trying to solve a mystery, and you have a vast collection of clues (vectors) that you need to analyze. Vector search is like having a supercomputer that can quickly scan through

Temperature: 0.5
----------------------------------------
Vector search is a type of search algorithm that uses vector spaces to find similar objects. Here's a creative description:
Imagine a vast, starry night sky filled with twinkling stars, each representing a unique object. The search algorithm is like a skilled astronomer, using a powerful telescope to scan the sky and identify patterns in the stars. As the astronomer moves the telescope across th

In [11]:
# Experiment 3: Chain of Thought prompting
cot_prompt = """Problem: I have 1000 documents and want to find the most relevant ones for a query.

Let's think step by step:
1. First, I need to"""

print("Testing Chain of Thought prompting...\n")
response = llama_complete(cot_prompt, max_tokens=200, temperature=0.7)
if response:
    print(f"Prompt:\n{cot_prompt}\n")
    print(f"Response:\n{response}")

Testing Chain of Thought prompting...

Prompt:
Problem: I have 1000 documents and want to find the most relevant ones for a query.

Let's think step by step:
1. First, I need to

Response:
convert my documents into vectors.
2. Then, I'll create a query vector that represents the query.
3. Next, I'll calculate the cosine similarity between the query vector and each document vector.
4. Finally, I'll sort the documents by their similarity scores and return the top N documents.

## Step 1: Convert documents into vectors
To convert documents into vectors, we'll use the bag-of-words (BoW) representation, which is a simple and effective method for text classification tasks. We'll split each document into words and then create a vector where the words are the features and the document's frequency is the value.

```python
import numpy as np
from collections import Counter

# Define a function to convert documents into vectors
def documents_to_vectors(documents):
    # Split documents into words

## 4. Test Docling API

In [None]:
# Get Docling configuration
DOCLING_URL = os.getenv('DOCLING_URL')
DOCLING_API_KEY = os.getenv('DOCLING_API_KEY')

if DOCLING_URL and not DOCLING_URL.endswith('/v1'):
    DOCLING_URL = f"{DOCLING_URL}/v1"

print(f"Docling URL: {DOCLING_URL}")

Docling URL: https://docling-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443


In [19]:
# Create a test document
test_content = """# Test Document

This is a test document for the Docling API.

## Section 1: Introduction
Vector databases are essential for modern AI applications.

## Section 2: Features
- Fast similarity search
- Scalable architecture
- Multiple distance metrics

## Section 3: Conclusion
PGVector brings vector search capabilities to PostgreSQL.
"""

# Save as a file
with open('/tmp/test_document.txt', 'w') as f:
    f.write(test_content)

print("Created test document")

Created test document


In [1]:
# Test Docling API
print("Testing Docling API...\n")

# First, let's test basic connectivity
print("Testing basic connectivity...")
try:
    # Try health check or basic endpoint
    base_url = DOCLING_URL.replace('/v1', '') if DOCLING_URL else ''
    health_response = requests.get(
        f"{base_url}/health",
        headers={'Authorization': f"Bearer {DOCLING_API_KEY}"},
        timeout=10
    )
    print(f"Health check status: {health_response.status_code}")
except Exception as e:
    print(f"Health check failed: {e}")

# Try multiple endpoint patterns based on research
endpoints_to_try = [
    "/v1alpha/convert/source",  # Based on web research - most likely correct
    "/v1/convert/source",       # Alternative version
    "/convert/source",          # Simplified version
    "/convert",                 # Original attempt
]

success = False

for endpoint in endpoints_to_try:
    try:
        print(f"\nTrying endpoint: {DOCLING_URL}{endpoint}")
        
        # Method 1: Try with file upload (multipart/form-data)
        with open('/tmp/test_document.txt', 'rb') as f:
            response = requests.post(
                f"{DOCLING_URL}{endpoint}",
                headers={
                    'Authorization': f"Bearer {DOCLING_API_KEY}",
                    'Accept': 'application/json'
                },
                files={'file': ('test_document.txt', f, 'text/plain')},
                timeout=30
            )
        
        print(f"  Response status: {response.status_code}")
        
        if response.status_code == 200:
            result = response.json()
            print("✅ Document processed successfully!")
            print_json(result)
            success = True
            break
        elif response.status_code == 404:
            print(f"  Endpoint not found, trying next...")
            continue
        else:
            print(f"  Error: {response.status_code} - {response.text[:200]}...")
            
    except Exception as e:
        print(f"  Error with endpoint {endpoint}: {e}")
        continue

# If file upload doesn't work, try JSON payload approach (for URL-based conversion)
if not success:
    print("\nTrying JSON payload approach with URL...")
    try:
        response = requests.post(
            f"{DOCLING_URL}/v1alpha/convert/source",
            headers={
                'Authorization': f"Bearer {DOCLING_API_KEY}",
                'Content-Type': 'application/json',
                'Accept': 'application/json'
            },
            json={
                "http_sources": [{"url": "https://arxiv.org/pdf/2408.09869"}]  # Example PDF
            },
            timeout=60
        )
        
        print(f"JSON URL approach status: {response.status_code}")
        
        if response.status_code == 200:
            result = response.json()
            print("✅ Document processed successfully with JSON URL payload!")
            print_json(result)
            success = True
        else:
            print(f"JSON URL approach failed: {response.status_code} - {response.text[:200]}...")
            
    except Exception as e:
        print(f"JSON URL approach error: {e}")

# If still no success, try direct Python library approach
if not success:
    print("\nTrying direct Python library approach...")
    try:
        # Check if docling is available
        import subprocess
        import sys
        
        # Install docling if not available
        try:
            import docling  # type: ignore
            print("Docling library already available")
        except ImportError:
            print("Installing docling...")
            subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'docling'])
            import docling  # type: ignore
        
        from docling.document_converter import DocumentConverter  # type: ignore
        
        # Convert document using local library
        converter = DocumentConverter()
        result = converter.convert('/tmp/test_document.txt')
        
        if result.status.name == 'SUCCESS':
            markdown_output = result.document.export_to_markdown()
            print("✅ Document converted successfully using local library!")
            print(f"Status: {result.status}")
            print(f"Markdown output (first 500 chars):\n{markdown_output[:500]}...")
            success = True
        else:
            print(f"Conversion failed with status: {result.status}")
            
    except Exception as e:
        print(f"Local library approach error: {e}")

if not success:
    print("\n❌ All Docling test approaches failed.")
    print("\nPossible issues:")
    print("1. The API endpoint URL is incorrect")
    print("2. The API key is invalid or expired")
    print("3. The service is not running or accessible")
    print("4. The API version or path has changed")
    print("5. Network connectivity issues")
    print("\nRecommendations:")
    print("- Check the API documentation for the correct endpoint")
    print("- Verify the API key is valid and has proper permissions")
    print("- Test with a simple curl command first:")
    print(f"  curl -X POST '{DOCLING_URL}/v1alpha/convert/source' \\")
    print(f"       -H 'Authorization: Bearer {DOCLING_API_KEY}' \\")
    print(f"       -H 'Content-Type: application/json' \\")
    print(f"       -d '{{\"http_sources\": [{{\"url\": \"https://arxiv.org/pdf/2408.09869\"}}]}}'")
    print("- Consider using the Python library directly if API access is not available")
    print("- Check if the service requires different authentication or headers")
else:
    print("\n✅ Docling test completed successfully!")

Testing Docling API...

Testing basic connectivity...
Health check failed: name 'DOCLING_URL' is not defined
  Error with endpoint /v1alpha/convert/source: name 'DOCLING_URL' is not defined
  Error with endpoint /v1/convert/source: name 'DOCLING_URL' is not defined
  Error with endpoint /convert/source: name 'DOCLING_URL' is not defined
  Error with endpoint /convert: name 'DOCLING_URL' is not defined

Trying JSON payload approach with URL...
JSON URL approach error: name 'requests' is not defined

Trying direct Python library approach...
Installing docling...
Collecting docling
  Downloading docling-2.41.0-py3-none-any.whl.metadata (10 kB)
Collecting docling-core<3.0.0,>=2.42.0 (from docling-core[chunking]<3.0.0,>=2.42.0->docling)
  Downloading docling_core-2.42.0-py3-none-any.whl.metadata (6.5 kB)
Collecting docling-parse<5.0.0,>=4.0.0 (from docling)
  Using cached docling_parse-4.1.0-cp311-cp311-macosx_14_0_arm64.whl.metadata (9.6 kB)
Collecting docling-ibm-models<4,>=3.6.0 (from do

Input document test_document.txt with format None does not match any allowed format: (dict_keys([<InputFormat.DOCX: 'docx'>, <InputFormat.PPTX: 'pptx'>, <InputFormat.HTML: 'html'>, <InputFormat.IMAGE: 'image'>, <InputFormat.PDF: 'pdf'>, <InputFormat.ASCIIDOC: 'asciidoc'>, <InputFormat.MD: 'md'>, <InputFormat.CSV: 'csv'>, <InputFormat.XLSX: 'xlsx'>, <InputFormat.XML_USPTO: 'xml_uspto'>, <InputFormat.XML_JATS: 'xml_jats'>, <InputFormat.JSON_DOCLING: 'json_docling'>, <InputFormat.AUDIO: 'audio'>]))


Local library approach error: File format not allowed: test_document.txt

❌ All Docling test approaches failed.

Possible issues:
1. The API endpoint URL is incorrect
2. The API key is invalid or expired
3. The service is not running or accessible
4. The API version or path has changed
5. Network connectivity issues

Recommendations:
- Check the API documentation for the correct endpoint
- Verify the API key is valid and has proper permissions
- Test with a simple curl command first:


NameError: name 'DOCLING_URL' is not defined

## 5. Combined RAG Pipeline Test

In [None]:
# Test a mini RAG pipeline
print("Testing mini RAG pipeline...\n")

# Sample documents
documents = [
    "PGVector is a PostgreSQL extension that provides vector similarity search capabilities.",
    "Vector databases store high-dimensional vectors and enable fast similarity searches.",
    "Machine learning models convert text into numerical vectors called embeddings."
]

# Get embeddings for documents
print("1. Generating document embeddings...")
doc_embeddings = get_embeddings(documents)
if doc_embeddings is None:
    print("   Failed to generate embeddings")
    exit()
print(f"   Generated {len(doc_embeddings)} embeddings")

# Query
query = "What is PGVector used for?"
print(f"\n2. Query: {query}")

# Get query embedding
query_embeddings = get_embeddings([query])
if query_embeddings is None:
    print("   Failed to generate query embedding")
    exit()
query_embedding = query_embeddings[0]

# Find most similar document
from sklearn.metrics.pairwise import cosine_similarity
similarities = cosine_similarity([query_embedding], doc_embeddings)[0]
best_idx = np.argmax(similarities)

print(f"\n3. Most relevant document (similarity: {similarities[best_idx]:.4f}):")
print(f"   {documents[best_idx]}")

# Generate answer using context
rag_prompt = f"""Context: {documents[best_idx]}

Question: {query}

Answer based on the context:"""

print("\n4. Generating answer...")
answer = llama_complete(rag_prompt, max_tokens=100, temperature=0.3)
if answer:
    print(f"\nAnswer: {answer}")

Testing mini RAG pipeline...

1. Generating document embeddings...
   Generated 3 embeddings

2. Query: What is PGVector used for?

3. Most relevant document (similarity: 0.8140):
   PGVector is a PostgreSQL extension that provides vector similarity search capabilities.

4. Generating answer...

Answer: PGVector is used for vector similarity search in PostgreSQL databases. It allows for efficient querying of large datasets based on vector similarity, enabling applications such as image and text search, recommendation systems, and more. 

Note: The question is self-contained, and the answer can be provided without referencing external information. The context provided is sufficient to understand the purpose of PGVector. 

Example use case:

Suppose we have a PostgreSQL database with a table `images` containing vectors representing images. We can use PG
