# SAP HANA Cloud Vector Store API Client Example

This notebook demonstrates how to use the SAP HANA Cloud Vector Store API for various operations.

In [None]:
import json
import requests
import numpy as np
import pandas as pd
from typing import Dict, List, Optional, Any

# API base URL
BASE_URL = "http://localhost:8000"

## Health Check

First, let's check if the API is running and connected to the database.

In [None]:
response = requests.get(f"{BASE_URL}/health")
print(f"Status Code: {response.status_code}")
response.json()

## Adding Documents

Let's add some sample documents to the vector store.

In [None]:
# Sample documents
texts = [
    "SAP HANA Cloud is a cloud-based database management system.",
    "Vector search enables semantic similarity searches.",
    "LangChain is a framework for building LLM-powered applications.",
    "Integration allows SAP HANA Cloud to be used as a vector store in LangChain.",
    "SAP HANA Cloud Vector Engine provides high-performance vector search capabilities."
]

# Metadata for the documents
metadatas = [
    {"source": "docs", "topic": "database", "category": "product"},
    {"source": "docs", "topic": "search", "category": "technology"},
    {"source": "docs", "topic": "framework", "category": "technology"},
    {"source": "docs", "topic": "integration", "category": "guide"},
    {"source": "docs", "topic": "vector_engine", "category": "feature"}
]

# Create request payload
payload = {
    "texts": texts,
    "metadatas": metadatas
}

# Send the request
response = requests.post(f"{BASE_URL}/texts", json=payload)
print(f"Status Code: {response.status_code}")
response.json()

## Querying by Text

Now let's query the vector store by text.

In [None]:
query_payload = {
    "query": "How does SAP HANA Cloud work with LangChain?",
    "k": 3,
    "filter": {"source": "docs"}
}

response = requests.post(f"{BASE_URL}/query", json=query_payload)
print(f"Status Code: {response.status_code}")
results = response.json()

# Display results in a DataFrame
result_data = []
for item in results['results']:
    result_data.append({
        "content": item['document']['page_content'],
        "score": item['score'],
        **item['document']['metadata']
    })

pd.DataFrame(result_data)

## Filtering by Metadata

Let's try a query with more complex filtering.

In [None]:
query_payload = {
    "query": "SAP HANA Cloud features",
    "k": 2,
    "filter": {
        "$or": [
            {"category": "product"},
            {"category": "feature"}
        ]
    }
}

response = requests.post(f"{BASE_URL}/query", json=query_payload)
print(f"Status Code: {response.status_code}")
results = response.json()

# Display results in a DataFrame
result_data = []
for item in results['results']:
    result_data.append({
        "content": item['document']['page_content'],
        "score": item['score'],
        **item['document']['metadata']
    })

pd.DataFrame(result_data)

## Max Marginal Relevance (MMR) Search

Let's try an MMR search to get diverse results.

In [None]:
mmr_payload = {
    "query": "SAP HANA Cloud",
    "k": 3,
    "fetch_k": 5,
    "lambda_mult": 0.7
}

response = requests.post(f"{BASE_URL}/query/mmr", json=mmr_payload)
print(f"Status Code: {response.status_code}")
results = response.json()

# Display results in a DataFrame
result_data = []
for item in results['results']:
    result_data.append({
        "content": item['document']['page_content'],
        **item['document']['metadata']
    })

pd.DataFrame(result_data)

## Deleting Documents

Finally, let's delete some documents by filter.

In [None]:
delete_payload = {
    "filter": {"category": "technology"}
}

response = requests.post(f"{BASE_URL}/delete", json=delete_payload)
print(f"Status Code: {response.status_code}")
response.json()

## Verify Deletion

Let's query again to verify the documents were deleted.

In [None]:
query_payload = {
    "query": "technology",
    "k": 5
}

response = requests.post(f"{BASE_URL}/query", json=query_payload)
print(f"Status Code: {response.status_code}")
results = response.json()

# Display results in a DataFrame
result_data = []
for item in results['results']:
    result_data.append({
        "content": item['document']['page_content'],
        "score": item['score'],
        **item['document']['metadata']
    })

pd.DataFrame(result_data)