From fc9325c6564b5dc23f400a8a3ffa31dc56cfd6e3 Mon Sep 17 00:00:00 2001 From: nirinchev Date: Wed, 22 Oct 2025 14:29:46 +0200 Subject: [PATCH 1/2] chore: more accuracy test tweaks --- .../mongodb/metadata/collectionSchema.ts | 2 +- src/tools/mongodb/read/aggregate.ts | 2 +- src/tools/mongodb/read/find.ts | 2 +- tests/accuracy/collectionSchema.test.ts | 15 +++++++++++ tests/accuracy/explain.test.ts | 1 + tests/accuracy/export.test.ts | 4 ++- tests/accuracy/find.test.ts | 26 ++++++++++++++++++- tests/accuracy/getPerformanceAdvisor.test.ts | 10 +++++++ tests/accuracy/logs.test.ts | 1 + .../mongodb/metadata/collectionSchema.test.ts | 2 +- .../tools/mongodb/read/aggregate.test.ts | 3 +-- .../tools/mongodb/read/find.test.ts | 3 +-- 12 files changed, 61 insertions(+), 10 deletions(-) diff --git a/src/tools/mongodb/metadata/collectionSchema.ts b/src/tools/mongodb/metadata/collectionSchema.ts index f03e9b9d1..ad74e9e74 100644 --- a/src/tools/mongodb/metadata/collectionSchema.ts +++ b/src/tools/mongodb/metadata/collectionSchema.ts @@ -21,7 +21,7 @@ export class CollectionSchemaTool extends MongoDBToolBase { .optional() .default(ONE_MB) .describe( - `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.` + `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded.` ), }; diff --git a/src/tools/mongodb/read/aggregate.ts b/src/tools/mongodb/read/aggregate.ts index 9ac18d357..0759b446d 100644 --- a/src/tools/mongodb/read/aggregate.ts +++ b/src/tools/mongodb/read/aggregate.ts @@ -17,7 +17,7 @@ import { LogId } from "../../../common/logger.js"; export const AggregateArgs = { pipeline: z.array(zEJSON()).describe("An array of aggregation stages to execute"), responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ -The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. \ +The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \ Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\ `), }; diff --git a/src/tools/mongodb/read/find.ts b/src/tools/mongodb/read/find.ts index 09506925e..eb006f335 100644 --- a/src/tools/mongodb/read/find.ts +++ b/src/tools/mongodb/read/find.ts @@ -30,7 +30,7 @@ export const FindArgs = { "A document, describing the sort order, matching the syntax of the sort argument of cursor.sort(). The keys of the object are the fields to sort on, while the values are the sort directions (1 for ascending, -1 for descending)." ), responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\ -The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. \ +The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \ Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.\ `), }; diff --git a/tests/accuracy/collectionSchema.test.ts b/tests/accuracy/collectionSchema.test.ts index 8c9039bdd..49ab90bff 100644 --- a/tests/accuracy/collectionSchema.test.ts +++ b/tests/accuracy/collectionSchema.test.ts @@ -1,14 +1,26 @@ import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js"; +import { Matcher } from "./sdk/matcher.js"; + +const listCollectionsOptionalCall = { + toolName: "list-collections", + parameters: { + database: "mflix", + }, + optional: true, +}; describeAccuracyTests([ { prompt: "Is there a title field in 'mflix.movies' namespace?", expectedToolCalls: [ + listCollectionsOptionalCall, { toolName: "collection-schema", parameters: { database: "mflix", collection: "movies", + sampleSize: Matcher.anyOf(Matcher.undefined, Matcher.number()), + responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()), }, }, ], @@ -16,11 +28,14 @@ describeAccuracyTests([ { prompt: "What is the type of value stored in title field in movies collection in mflix database?", expectedToolCalls: [ + listCollectionsOptionalCall, { toolName: "collection-schema", parameters: { database: "mflix", collection: "movies", + sampleSize: Matcher.anyOf(Matcher.undefined, Matcher.number()), + responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()), }, }, ], diff --git a/tests/accuracy/explain.test.ts b/tests/accuracy/explain.test.ts index 0630a6ab7..2e4625572 100644 --- a/tests/accuracy/explain.test.ts +++ b/tests/accuracy/explain.test.ts @@ -45,6 +45,7 @@ describeAccuracyTests([ $match: { release_year: 2020 }, }, ], + responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()), }, }, ], diff --git a/tests/accuracy/export.test.ts b/tests/accuracy/export.test.ts index 534f2ab6e..c46b24dac 100644 --- a/tests/accuracy/export.test.ts +++ b/tests/accuracy/export.test.ts @@ -14,7 +14,9 @@ describeAccuracyTests([ exportTarget: [ { name: "find", - arguments: {}, + arguments: { + filter: Matcher.emptyObjectOrUndefined, + }, }, ], jsonExportFormat: Matcher.anyValue, diff --git a/tests/accuracy/find.test.ts b/tests/accuracy/find.test.ts index 4b2802bbf..e402421ae 100644 --- a/tests/accuracy/find.test.ts +++ b/tests/accuracy/find.test.ts @@ -1,10 +1,27 @@ +import type { ExpectedToolCall } from "./sdk/accuracyResultStorage/resultStorage.js"; import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js"; import { Matcher } from "./sdk/matcher.js"; +const optionalListCalls: (database: string) => ExpectedToolCall[] = (database) => [ + { + toolName: "list-databases", + parameters: {}, + optional: true, + }, + { + toolName: "list-collections", + parameters: { + database, + }, + optional: true, + }, +]; + describeAccuracyTests([ { prompt: "List all the movies in 'mflix.movies' namespace.", expectedToolCalls: [ + ...optionalListCalls("mflix"), { toolName: "find", parameters: { @@ -18,6 +35,7 @@ describeAccuracyTests([ { prompt: "List all the documents in 'comics.books' namespace.", expectedToolCalls: [ + ...optionalListCalls("comics"), { toolName: "find", parameters: { @@ -31,6 +49,7 @@ describeAccuracyTests([ { prompt: "Find all the movies in 'mflix.movies' namespace with runtime less than 100.", expectedToolCalls: [ + ...optionalListCalls("mflix"), { toolName: "find", parameters: { @@ -46,6 +65,7 @@ describeAccuracyTests([ { prompt: "Find all movies in 'mflix.movies' collection where director is 'Christina Collins'", expectedToolCalls: [ + ...optionalListCalls("mflix"), { toolName: "find", parameters: { @@ -61,6 +81,7 @@ describeAccuracyTests([ { prompt: "Give me all the movie titles available in 'mflix.movies' namespace", expectedToolCalls: [ + ...optionalListCalls("mflix"), { toolName: "find", parameters: { @@ -81,6 +102,7 @@ describeAccuracyTests([ { prompt: "Use 'mflix.movies' namespace to answer who were casted in the movie 'Certain Fish'", expectedToolCalls: [ + ...optionalListCalls("mflix"), { toolName: "find", parameters: { @@ -99,6 +121,7 @@ describeAccuracyTests([ { prompt: "From the mflix.movies namespace, give me first 2 movies of Horror genre sorted ascending by their runtime", expectedToolCalls: [ + ...optionalListCalls("mflix"), { toolName: "find", parameters: { @@ -112,8 +135,9 @@ describeAccuracyTests([ ], }, { - prompt: "I want a COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.", + prompt: "I want an exported COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.", expectedToolCalls: [ + ...optionalListCalls("mflix"), { toolName: "find", parameters: { diff --git a/tests/accuracy/getPerformanceAdvisor.test.ts b/tests/accuracy/getPerformanceAdvisor.test.ts index 02b61b33f..686748e4d 100644 --- a/tests/accuracy/getPerformanceAdvisor.test.ts +++ b/tests/accuracy/getPerformanceAdvisor.test.ts @@ -1,5 +1,6 @@ import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js"; import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js"; +import { Matcher } from "./sdk/matcher.js"; // Shared mock tool implementations const mockedTools = { @@ -127,6 +128,15 @@ describeAccuracyTests([ parameters: { projectId: "mflix", clusterName: "mflix-cluster", + operations: Matcher.anyOf( + Matcher.undefined, + Matcher.value([ + "suggestedIndexes", + "dropIndexSuggestions", + "slowQueryLogs", + "schemaSuggestions", + ]) + ), }, }, ], diff --git a/tests/accuracy/logs.test.ts b/tests/accuracy/logs.test.ts index 83c9179b9..68163bdc1 100644 --- a/tests/accuracy/logs.test.ts +++ b/tests/accuracy/logs.test.ts @@ -9,6 +9,7 @@ describeAccuracyTests([ toolName: "mongodb-logs", parameters: { type: "startupWarnings", + limit: Matcher.anyOf(Matcher.undefined, Matcher.number()), }, }, ], diff --git a/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts b/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts index 47f117b28..f23cce19f 100644 --- a/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts +++ b/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts @@ -26,7 +26,7 @@ describeWithMongoDB("collectionSchema tool", (integration) => { { name: "responseBytesLimit", type: "number", - description: `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`, + description: `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded.`, required: false, }, ]); diff --git a/tests/integration/tools/mongodb/read/aggregate.test.ts b/tests/integration/tools/mongodb/read/aggregate.test.ts index d585d5786..0c7ce6297 100644 --- a/tests/integration/tools/mongodb/read/aggregate.test.ts +++ b/tests/integration/tools/mongodb/read/aggregate.test.ts @@ -26,8 +26,7 @@ describeWithMongoDB("aggregate tool", (integration) => { }, { name: "responseBytesLimit", - description: - 'The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.', + description: `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.`, type: "number", required: false, }, diff --git a/tests/integration/tools/mongodb/read/find.test.ts b/tests/integration/tools/mongodb/read/find.test.ts index c466650fa..c7aaf3da9 100644 --- a/tests/integration/tools/mongodb/read/find.test.ts +++ b/tests/integration/tools/mongodb/read/find.test.ts @@ -56,8 +56,7 @@ describeWithMongoDB("find tool with default configuration", (integration) => { }, { name: "responseBytesLimit", - description: - 'The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.', + description: `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.`, type: "number", required: false, }, From c447903c3fa1edc3d846b9b8e6cd5bb43e131b09 Mon Sep 17 00:00:00 2001 From: nirinchev Date: Thu, 23 Oct 2025 15:50:15 +0200 Subject: [PATCH 2/2] more tweaks --- src/tools/atlas/read/getPerformanceAdvisor.ts | 4 +++- tests/accuracy/explain.test.ts | 11 +++-------- tests/accuracy/getPerformanceAdvisor.test.ts | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/tools/atlas/read/getPerformanceAdvisor.ts b/src/tools/atlas/read/getPerformanceAdvisor.ts index fa7ec5194..c7aead5c4 100644 --- a/src/tools/atlas/read/getPerformanceAdvisor.ts +++ b/src/tools/atlas/read/getPerformanceAdvisor.ts @@ -26,7 +26,9 @@ export class GetPerformanceAdvisorTool extends AtlasToolBase { protected description = `Get MongoDB Atlas performance advisor recommendations, which includes the operations: suggested indexes, drop index suggestions, schema suggestions, and a sample of the most recent (max ${DEFAULT_SLOW_QUERY_LOGS_LIMIT}) slow query logs`; public operationType: OperationType = "read"; protected argsShape = { - projectId: AtlasArgs.projectId().describe("Atlas project ID to get performance advisor recommendations"), + projectId: AtlasArgs.projectId().describe( + "Atlas project ID to get performance advisor recommendations. The project ID is a hexadecimal identifier of 24 characters. If the user has only specified the name, use the `atlas-list-projects` tool to retrieve the user's projects with their ids." + ), clusterName: AtlasArgs.clusterName().describe("Atlas cluster name to get performance advisor recommendations"), operations: z .array(PerformanceAdvisorOperationType) diff --git a/tests/accuracy/explain.test.ts b/tests/accuracy/explain.test.ts index 2e4625572..cac7ba5b6 100644 --- a/tests/accuracy/explain.test.ts +++ b/tests/accuracy/explain.test.ts @@ -1,11 +1,6 @@ import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js"; import { Matcher } from "./sdk/matcher.js"; -/** - * None of these tests score a parameter match on any of the models, likely - * because we are using Zod.union, when we probably should've used - * Zod.discriminatedUnion - */ describeAccuracyTests([ { prompt: `Will fetching documents, where release_year is 2020, from 'mflix.movies' namespace perform a collection scan?`, @@ -23,7 +18,7 @@ describeAccuracyTests([ }, }, ], - verbosity: Matcher.string(), + verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined), }, }, ], @@ -49,7 +44,7 @@ describeAccuracyTests([ }, }, ], - verbosity: Matcher.string(), + verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined), }, }, ], @@ -70,7 +65,7 @@ describeAccuracyTests([ }, }, ], - verbosity: Matcher.string(), + verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined), }, }, ], diff --git a/tests/accuracy/getPerformanceAdvisor.test.ts b/tests/accuracy/getPerformanceAdvisor.test.ts index 768b035d0..ee286f98c 100644 --- a/tests/accuracy/getPerformanceAdvisor.test.ts +++ b/tests/accuracy/getPerformanceAdvisor.test.ts @@ -79,7 +79,7 @@ describeAccuracyTests([ }, // Test for Drop Index Suggestions operation { - prompt: "Show me drop index suggestions for the 'mflix' project and 'mflix-cluster' cluster", + prompt: "Show me drop index suggestions for the project named 'mflix' and 'mflix-cluster' cluster", expectedToolCalls: [ ...listProjectsAndClustersToolCalls, {