Skip to content

Commit 2d57001

Browse files
committed
Implement Document Retrieval Services #7509
1 parent fd2dc6e commit 2d57001

5 files changed

Lines changed: 186 additions & 4 deletions

File tree

.github/ISSUE/epic-architect-knowledge-base-as-mcp.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,4 @@ We will employ a rapid and agile development approach. The scope and API specifi
3535
- `ticket-kb-implement-delete-db-service.md`: Implement the service to delete the ChromaDB collection.
3636
- `ticket-kb-implement-query-service.md`: Implement the service to query documents from the knowledge base.
3737
- `ticket-kb-implement-sync-service.md`: Implement the service to build and embed the knowledge base content.
38+
- `ticket-kb-implement-document-services.md`: Implement services to list and retrieve individual documents.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
title: Implement Document Retrieval Services
3+
labels: enhancement, AI
4+
---
5+
6+
Parent epic: #7501
7+
GH ticket id: #7509
8+
9+
**Phase:** 2
10+
**Assignee:** tobiu
11+
**Status:** Done
12+
13+
## Description
14+
15+
This ticket covers the implementation of the previously optional document retrieval endpoints. This will provide essential tools for inspecting and debugging the contents of the knowledge base.
16+
17+
Two distinct tools will be created:
18+
1. `list_documents`: To list documents from the collection, with support for pagination.
19+
2. `get_document_by_id`: To retrieve a single document by its unique ID.
20+
21+
## Acceptance Criteria
22+
23+
1. The `openapi.yaml` file is updated with two new endpoints:
24+
- `GET /documents`
25+
- `GET /documents/{id}`
26+
2. The endpoints have `operationId`s of `list_documents` and `get_document_by_id` respectively.
27+
3. A new `ai/mcp/server/knowledge-base/services/documentService.mjs` file is created.
28+
4. The service contains `listDocuments` and `getDocumentById` functions.
29+
5. The `toolService.mjs` `serviceMapping` is updated to point the new `operationId`s to their respective service functions.

ai/mcp/server/knowledge-base/openapi.yaml

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,79 @@ paths:
138138
schema:
139139
$ref: '#/components/schemas/ErrorResponse'
140140

141+
/documents:
142+
get:
143+
summary: List Documents
144+
operationId: list_documents
145+
x-annotations:
146+
readOnlyHint: true
147+
description: |
148+
Retrieves a paginated list of all documents from the knowledge base collection.
149+
tags: [Documents]
150+
parameters:
151+
- name: limit
152+
in: query
153+
required: false
154+
description: The maximum number of documents to return.
155+
schema:
156+
type: integer
157+
default: 100
158+
- name: offset
159+
in: query
160+
required: false
161+
description: The number of documents to skip for pagination.
162+
schema:
163+
type: integer
164+
default: 0
165+
responses:
166+
'200':
167+
description: A paginated list of documents.
168+
content:
169+
application/json:
170+
schema:
171+
$ref: '#/components/schemas/DocumentListResponse'
172+
'500':
173+
description: Internal server error.
174+
content:
175+
application/json:
176+
schema:
177+
$ref: '#/components/schemas/ErrorResponse'
178+
179+
/documents/{id}:
180+
get:
181+
summary: Get Document by ID
182+
operationId: get_document_by_id
183+
x-annotations:
184+
readOnlyHint: true
185+
description: Retrieves a single document from the collection by its unique ID.
186+
tags: [Documents]
187+
parameters:
188+
- name: id
189+
in: path
190+
required: true
191+
description: The unique ID of the document to retrieve.
192+
schema:
193+
type: string
194+
responses:
195+
'200':
196+
description: The requested document.
197+
content:
198+
application/json:
199+
schema:
200+
$ref: '#/components/schemas/Document'
201+
'404':
202+
description: Document not found.
203+
content:
204+
application/json:
205+
schema:
206+
$ref: '#/components/schemas/ErrorResponse'
207+
'500':
208+
description: Internal server error.
209+
content:
210+
application/json:
211+
schema:
212+
$ref: '#/components/schemas/ErrorResponse'
213+
141214
components:
142215
schemas:
143216
HealthCheckResponse:
@@ -195,6 +268,26 @@ components:
195268
items:
196269
$ref: '#/components/schemas/QueryResultItem'
197270

271+
Document:
272+
type: object
273+
properties:
274+
id:
275+
type: string
276+
metadata:
277+
type: object
278+
content:
279+
type: string
280+
281+
DocumentListResponse:
282+
type: object
283+
properties:
284+
count:
285+
type: integer
286+
documents:
287+
type: array
288+
items:
289+
$ref: '#/components/schemas/Document'
290+
198291
SuccessResponse:
199292
type: object
200293
properties:
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import { ChromaClient } from 'chromadb';
2+
import aiConfig from '../../../../buildScripts/ai/aiConfig.mjs';
3+
4+
/**
5+
* Retrieves a paginated list of all documents from the knowledge base collection.
6+
* @param {object} params - The parameters for listing documents.
7+
* @param {number} [params.limit=100] - The maximum number of documents to return.
8+
* @param {number} [params.offset=0] - The number of documents to skip for pagination.
9+
* @returns {Promise<object>} A promise that resolves to the list of documents.
10+
*/
11+
async function listDocuments({ limit = 100, offset = 0 } = {}) {
12+
const dbClient = new ChromaClient();
13+
const collection = await dbClient.getCollection({ name: aiConfig.knowledgeBase.collectionName });
14+
15+
const results = await collection.get({
16+
limit: limit,
17+
offset: offset,
18+
include: ["metadatas", "documents"]
19+
});
20+
21+
const documents = results.ids.map((id, index) => ({
22+
id: id,
23+
metadata: results.metadatas[index],
24+
content: results.documents[index]
25+
}));
26+
27+
return {
28+
count: documents.length,
29+
documents: documents
30+
};
31+
}
32+
33+
/**
34+
* Retrieves a single document from the collection by its unique ID.
35+
* @param {string} id - The unique ID of the document to retrieve.
36+
* @returns {Promise<object>} A promise that resolves to the requested document.
37+
*/
38+
async function getDocumentById({ id }) {
39+
const dbClient = new ChromaClient();
40+
const collection = await dbClient.getCollection({ name: aiConfig.knowledgeBase.collectionName });
41+
42+
const result = await collection.get({
43+
ids: [id],
44+
include: ["metadatas", "documents"]
45+
});
46+
47+
if (result.ids.length === 0) {
48+
throw new Error(`Document with id '${id}' not found.`);
49+
}
50+
51+
return {
52+
id: result.ids[0],
53+
metadata: result.metadatas[0],
54+
content: result.documents[0]
55+
};
56+
}
57+
58+
export { listDocuments, getDocumentById };

ai/mcp/server/knowledge-base/services/toolService.mjs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { fileURLToPath } from 'url';
77
import * as healthService from './healthService.mjs';
88
import * as databaseService from './databaseService.mjs';
99
import * as queryService from './queryService.mjs';
10+
import * as documentService from './documentService.mjs';
1011

1112
const __filename = fileURLToPath(import.meta.url);
1213
const __dirname = path.dirname(__filename);
@@ -26,15 +27,15 @@ const serviceMapping = {
2627
healthcheck : healthService.healthcheck,
2728
sync_database : databaseService.syncDatabase,
2829
delete_database: databaseService.deleteDatabase,
29-
query_documents: queryService.queryDocuments
30+
query_documents: queryService.queryDocuments,
31+
list_documents: documentService.listDocuments,
32+
get_document_by_id: documentService.getDocumentById
3033
};
3134

3235
/**
3336
* Dynamically constructs a Zod schema for a tool's input arguments based on its
3437
* OpenAPI operation definition. This schema is used for robust runtime validation
3538
* of incoming tool call arguments.
36-
* @param {object} operation - The OpenAPI operation object.
37-
* @returns {z.ZodObject} A Zod object schema representing the tool's input.
3839
*/
3940
function buildZodSchema(operation) {
4041
const shape = {};
@@ -311,7 +312,7 @@ async function callTool(toolName, args) {
311312
const validatedArgs = tool.zodSchema.parse(args);
312313

313314
// Special handling for tools that expect a single object argument.
314-
if (toolName === 'query_documents') {
315+
if (['query_documents', 'list_documents', 'get_document_by_id'].includes(toolName)) {
315316
return tool.handler(validatedArgs);
316317
}
317318

0 commit comments

Comments
 (0)