Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1fc3b9c
standalone agentic loop module
nuwangeek Apr 17, 2026
622c969
fixed requested changes
nuwangeek Apr 17, 2026
cf9723e
fixed ruff format issues
nuwangeek Apr 17, 2026
d159731
Merge pull request #157 from rootcodelabs/llm-394
nuwangeek Apr 22, 2026
83c7500
complete API semantic searcher with ambiguous result handling and too…
nuwangeek Apr 22, 2026
21c3c27
Merge pull request #158 from rootcodelabs/llm-394
nuwangeek Apr 22, 2026
591b119
Merge pull request #159 from rootcodelabs/llm-345-dev
nuwangeek Apr 22, 2026
c5582f8
complete semantic searcher evaluation and update to multi point index…
nuwangeek Apr 22, 2026
f569070
Merge pull request #160 from rootcodelabs/llm-403
nuwangeek Apr 22, 2026
80bfce7
competed integration of agentic loop with semantic searcher and strea…
nuwangeek Apr 22, 2026
d71a5eb
Merge pull request #161 from rootcodelabs/llm-408
nuwangeek Apr 24, 2026
6efe48b
Implemented the API caller module
nuwangeek Apr 24, 2026
62425c1
Implemented Agentic Loop (#409)
nuwangeek Apr 29, 2026
2449472
Merge pull request #164 from buerokratt/wip
nuwangeek May 5, 2026
43e9ad3
Merge branch 'llm-345-dev' into wip
nuwangeek May 5, 2026
76cfbc4
Implement API semantic searcher with ambiguous result handling and to…
nuwangeek May 5, 2026
0ea073b
Merge pull request #167 from buerokratt/wip
nuwangeek May 6, 2026
c25838e
Semantic searcher evaluation and update to multi point indexing strat…
nuwangeek May 6, 2026
c368cfd
Merge pull request #169 from buerokratt/wip
nuwangeek May 6, 2026
c13e742
Integrate agentic loop with semantic searcher and streaming (#420)
nuwangeek May 6, 2026
bdc878c
Merge pull request #171 from buerokratt/wip
nuwangeek May 6, 2026
a385166
Merge branch 'llm-348' into wip
nuwangeek May 6, 2026
2f743a1
Implemented the API caller module (#421)
nuwangeek May 6, 2026
59b604c
Merge pull request #173 from buerokratt/wip
nuwangeek May 6, 2026
a5eb958
Integrate API caller, response formatter, with update streaming and f…
nuwangeek May 6, 2026
49e9e77
Merge pull request #175 from buerokratt/wip
nuwangeek May 6, 2026
8e7ab98
Merge branch 'ckb_integration_for_data_sync' into wip
nuwangeek May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion constants.ini
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ DB_PASSWORD=dbadmin
RAG_SEARCH_RUUTER_PUBLIC_INTERNAL_SERVICE=http://ruuter:8086/services
SERVICE_DMAPPER_HBS=http://data-mapper:3000/hbs/rag-search
SERVICE_PROJECT_LAYER=services
CKB_RUUTER_INTERNAL=http://ruuter-internal:8089/ckb
RAG_SEARCH_LLM_SERVICE=http://llm-orchestration-service:8100
CKB_RUUTER_INTERNAL=http://ruuter-internal:8089/ckb
552 changes: 542 additions & 10 deletions docs/API_TOOL_CALLING.md

Large diffs are not rendered by default.

30 changes: 26 additions & 4 deletions src/api_tool_indexer/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,19 @@ class ApiToolIndexerConstants:
RETRY_DELAY_BASE = 2 # Exponential backoff base (2^attempt seconds)
REQUEST_TIMEOUT = 60 # seconds

# Number of example queries generated per endpoint.
# Each example becomes its own Qdrant point so its vector sits in the exact
# language region of the embedding space, enabling short-query matching.
EXAMPLE_QUERY_COUNT = 5

# Context Enrichment Template
# Used to generate a rich semantic context for each endpoint before embedding
# Full template goes in chunk_prompt; document_prompt is left empty.
#
# Multi-point indexing strategy:
# - Each example query line is extracted and stored as its own Qdrant point,
# embedded from that individual sentence alone.
# - The prose + all examples combined become one summary point.
# All in the same language as the endpoint description — no bilingual duplication.
CONTEXT_TEMPLATE = """<document>
{full_endpoint_info}
</document>
Expand All @@ -44,12 +55,23 @@ class ApiToolIndexerConstants:
</endpoint>

Please generate a rich, detailed context that describes this API endpoint comprehensively for semantic search.
Include information about:
Keep the prose context general and country-agnostic. Include information about:
- What the user wants to accomplish by calling this endpoint
- Key terms and synonyms for this action
- Related concepts and use cases
- Common ways users might ask for this functionality in natural language

IMPORTANT: Generate the context in the SAME LANGUAGE as the endpoint description above. If the description is in Estonian, respond in Estonian. If in English, respond in English. If in Russian, respond in Russian.
IMPORTANT: Generate the prose context and the example questions in the SAME LANGUAGE as the endpoint description above. However, always use the exact section header "Example queries:" in English regardless of language — this is a required machine-readable marker.

IMPORTANT for example queries: This is a system built for Estonian government digital services (Bürokratt). Ground the examples in an Estonian context — use Estonian cities (Tallinn, Tartu, Pärnu, Narva), Estonian institutions, and Estonia-relevant scenarios. Only use non-Estonian locations if the endpoint is explicitly about comparing or fetching data for multiple countries.

Then add a section with exactly {example_count} realistic and diverse example questions a real user might ask when they need this endpoint. Cover different phrasings, synonyms, and indirect ways of asking — do not just repeat the description verbatim.

Example queries:
- <example question 1>
- <example question 2>
- <example question 3>
- <example question 4>
- <example question 5>

Answer only with the enriched context and nothing else."""
Answer only with the enriched context and example queries — nothing else."""
211 changes: 161 additions & 50 deletions src/api_tool_indexer/main_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,27 @@

Receives raw API EndpointData, enriches it with LLM-generated context,
creates hybrid embeddings (dense + sparse), and stores the result in Qdrant
api_tool_collection as a single point per endpoint.
api_tool_collection as multiple points per endpoint.

Multi-point indexing strategy:
- One 'example' point per example query extracted from the LLM context.
Each query is embedded individually so its vector sits in the correct
language region of the embedding space, enabling accurate short-query matching.
- One 'summary' point containing the combined name + description + enriched context.
This handles broad/paraphrased queries that don't match any single example.

Pipeline steps:
1. Build LLM prompt from endpoint name, description, and params
2. Generate context via LLMAPIClient.generate_context()
3. Build embed text: name + description + context + param descriptions
4. Create dense embedding via LLMAPIClient.create_embedding()
5. Create sparse vector via compute_sparse_vector()
6. Delete existing Qdrant point for idempotent update
7. Upsert EnrichedEndpoint to api_tool_collection
3. Parse example query lines from the returned context
4. Create dense + sparse embeddings per example query (example points)
5. Create dense + sparse embedding for combined summary text (summary point)
6. Delete all existing Qdrant points for this endpoint (filter-based, idempotent)
7. Upsert all points to api_tool_collection
8. Return IndexingResult
"""

import re
import sys
import json
import asyncio
Expand Down Expand Up @@ -111,10 +119,13 @@ async def _generate_context_for_endpoint(

logger.info(f"params_summary : {params_summary}")

# Escape braces in the URL to prevent str.format() from treating path
# parameter templates like {id} as format placeholders (KeyError).
safe_url = endpoint_data.url.replace("{", "{{").replace("}", "}}")
full_endpoint_info = (
f"Endpoint: {endpoint_data.name}\n"
f"Method: {endpoint_data.method}\n"
f"URL: {endpoint_data.url}\n"
f"URL: {safe_url}\n"
f"Description: {endpoint_data.description}\n"
f"Parameters: {params_summary}"
)
Expand All @@ -124,15 +135,25 @@ async def _generate_context_for_endpoint(
name=endpoint_data.name,
description=endpoint_data.description,
params_summary=params_summary,
example_count=ApiToolIndexerConstants.EXAMPLE_QUERY_COUNT,
)

logger.debug(
"Generated context prompt for endpoint '{}': {} chars",
endpoint_data.endpoint_id,
len(context_prompt),
)

# Re-use the internal HTTP call of LLMAPIClient - /generate-context endpoint
# context_type="api_tool" makes context_manager use API_TOOL_CONTEXT_PROMPT,
# which passes chunk_prompt through unmodified so CHUNK_CONTEXT_PROMPT cannot
# override the instructions in CONTEXT_TEMPLATE (e.g. example query generation).
request_data = {
"document_prompt": "",
"chunk_prompt": context_prompt,
"environment": api_client.environment,
"use_cache": True,
"use_cache": False,
"connection_id": api_client.connection_id,
"context_type": "api_tool",
}

last_error = None
Expand All @@ -153,6 +174,13 @@ async def _generate_context_for_endpoint(
result = response.json()

context = result.get("context", "").strip()

logger.debug(
"context preview: {}{}",
context[:200].replace("\n", "\\n"),
"..." if len(context) > 200 else "",
)

if not context:
raise ValueError("Empty context returned from API")

Expand Down Expand Up @@ -181,9 +209,54 @@ async def _generate_context_for_endpoint(
raise RuntimeError(error_msg)


_EXAMPLE_SECTION_HEADER = re.compile(r"^example queries\s*:", re.IGNORECASE)


def _parse_example_queries(context: str) -> List[str]:
"""Extract example query lines from the LLM-generated context.

Scans for the 'Example queries:' section header and collects every
subsequent '- ' line until the section ends.

Args:
context: Raw LLM-generated context string from generate_context().

Returns:
List of example query strings, deduplicated and preserving order.
"""
examples: List[str] = []
in_section = False

for line in context.splitlines():
stripped = line.strip()
if _EXAMPLE_SECTION_HEADER.match(stripped):
in_section = True
continue
if in_section:
if stripped.startswith("- "):
examples.append(stripped[2:].strip())
elif stripped and not stripped.startswith("#"):
# Non-empty, non-comment line that isn't a list item ends the section
in_section = False

# Deduplicate preserving order
seen: set[str] = set()
unique: List[str] = []
for ex in examples:
if ex and ex not in seen:
seen.add(ex)
unique.append(ex)
return unique


async def index_endpoint(endpoint_data: EndpointData) -> IndexingResult:
"""Index one API endpoint into Qdrant api_tool_collection.

Creates multiple points per endpoint:
- One 'example' point per parsed example query, embedded from that
individual sentence so the vector sits in the correct language region.
- One 'summary' point embedded from the combined name + description + context.

Args:
endpoint_data: Raw endpoint data from mock_endpoints table.

Expand All @@ -197,7 +270,6 @@ async def index_endpoint(endpoint_data: EndpointData) -> IndexingResult:
)

try:
# Steps 1–5: LLM enrichment and embedding
async with LLMAPIClient(
api_base_url=ApiToolIndexerConstants.DEFAULT_API_BASE_URL,
environment=ApiToolIndexerConstants.DEFAULT_ENVIRONMENT,
Expand All @@ -206,92 +278,131 @@ async def index_endpoint(endpoint_data: EndpointData) -> IndexingResult:
retry_delay_base=ApiToolIndexerConstants.RETRY_DELAY_BASE,
timeout=ApiToolIndexerConstants.REQUEST_TIMEOUT,
) as api_client:
# Step 1-2: Generate LLM enriched context
logger.info("Step 1/5: Generating LLM enriched context")
# Step 1: Generate LLM enriched context (prose + example queries)
logger.info("Step 1/4: Generating LLM enriched context")
enriched_context = await _generate_context_for_endpoint(
api_client, endpoint_data
)

# Step 3: Build embed text combining all semantic signal
# Step 2: Parse example query lines from the context
example_queries = _parse_example_queries(enriched_context)
if not example_queries:
logger.warning(
f"No example queries parsed from context for endpoint '{endpoint_id}'. "
"The LLM output may not contain an 'Example queries:' section. "
"Only a summary point will be indexed — search accuracy may be reduced."
)
else:
logger.info(
f"Step 2/4: Parsed {len(example_queries)} example queries from context"
)

# Step 3: Embed each example query individually → example points
logger.info(
f"Step 3/4: Creating embeddings for {len(example_queries)} example points"
)
enriched_points: List[EnrichedEndpoint] = []

for i, example in enumerate(example_queries):
logger.debug(
f" Embedding example {i + 1}/{len(example_queries)}: "
f"'{example[:80]}{'...' if len(example) > 80 else ''}'"
)
ex_embedding = await api_client.create_embedding(example)
ex_sparse = compute_sparse_vector(example)
enriched_points.append(
EnrichedEndpoint(
endpoint_id=endpoint_id,
name=endpoint_data.name,
description=endpoint_data.description,
url=endpoint_data.url,
method=endpoint_data.method,
params=endpoint_data.params,
enriched_context=enriched_context,
service_id=endpoint_data.service_id,
point_type="example",
example_text=example,
embedding=ex_embedding,
sparse_indices=ex_sparse.indices,
sparse_values=ex_sparse.values,
)
)

# Step 4: Embed combined summary text → summary point
logger.info("Step 4/4: Creating summary point embedding")
params_summary = _build_params_summary(endpoint_data.params)
embed_text = (
summary_text = (
f"{endpoint_data.name}. "
f"{endpoint_data.description}. "
f"{enriched_context}. "
f"Parameters: {params_summary}"
)
summary_embedding = await api_client.create_embedding(summary_text)

# Step 4: Create dense embedding vector
logger.info("Step 2/5: Creating dense embedding vector")
dense_embedding = await api_client.create_embedding(embed_text)

# Step 5: Create sparse (BM25) vector - synchronous, after closing HTTP session
logger.info("Step 3/5: Computing sparse (BM25) vector")
sparse_vec = compute_sparse_vector(embed_text)

# Build EnrichedEndpoint ready for Qdrant storage
enriched = EnrichedEndpoint(
endpoint_id=endpoint_id,
name=endpoint_data.name,
description=endpoint_data.description,
url=endpoint_data.url,
method=endpoint_data.method,
params=endpoint_data.params,
enriched_context=enriched_context,
service_id=endpoint_data.service_id,
embedding=dense_embedding,
sparse_indices=sparse_vec.indices,
sparse_values=sparse_vec.values,
# Sparse vectors are CPU-bound — computed after the HTTP session closes
summary_sparse = compute_sparse_vector(summary_text)
enriched_points.append(
EnrichedEndpoint(
endpoint_id=endpoint_id,
name=endpoint_data.name,
description=endpoint_data.description,
url=endpoint_data.url,
method=endpoint_data.method,
params=endpoint_data.params,
enriched_context=enriched_context,
service_id=endpoint_data.service_id,
point_type="summary",
embedding=summary_embedding,
sparse_indices=summary_sparse.indices,
sparse_values=summary_sparse.values,
)
)

# Steps 6-7: Qdrant operations (separate try/finally ensures connection is closed)
# Qdrant operations separate block so the connection is always closed
qdrant = ApiToolQdrantManager()
try:
qdrant.connect()
qdrant.ensure_collection()

# Step 6: Delete existing point for idempotent update
logger.info("Step 4/5: Deleting existing Qdrant point (idempotent update)")
deleted = qdrant.delete_endpoint_point(endpoint_id)
# Delete all existing points for this endpoint (filter-based, idempotent)
deleted = qdrant.delete_endpoint_points(endpoint_id)
if not deleted:
logger.error(
f"Failed to delete existing Qdrant point for endpoint '{endpoint_id}'. "
f"Failed to delete existing points for endpoint '{endpoint_id}'. "
"Aborting upsert to prevent stale data."
)
return IndexingResult(
success=False,
endpoint_id=endpoint_id,
message="Qdrant delete failed before upsert",
error="delete_endpoint_point returned False",
error="delete_endpoint_points returned False",
)

# Step 7: Upsert the enriched endpoint
logger.info("Step 5/5: Upserting endpoint into api_tool_collection")
upserted = qdrant.upsert_endpoint(enriched)

upserted = qdrant.upsert_endpoint_points(enriched_points)
finally:
qdrant.close()

# Step 8: Return result
n_examples = len(example_queries)
if upserted:
logger.success(
f"Endpoint '{endpoint_id}' (name='{endpoint_data.name}') "
"indexed successfully"
f"Endpoint '{endpoint_id}' (name='{endpoint_data.name}') indexed successfully "
f"({n_examples} example points + 1 summary point)"
)
return IndexingResult(
success=True,
endpoint_id=endpoint_id,
message=(
f"Endpoint '{endpoint_data.name}' indexed successfully into "
f"api_tool_collection (dim={len(dense_embedding)})"
f"api_tool_collection "
f"({n_examples} example points + 1 summary point)"
),
)
else:
return IndexingResult(
success=False,
endpoint_id=endpoint_id,
message="Qdrant upsert failed",
error="upsert_endpoint returned False",
error="upsert_endpoint_points returned False",
)

except Exception as e:
Expand Down
Loading
Loading