From f78785077083790de06d3bd4df9389de19d00acf Mon Sep 17 00:00:00 2001 From: Willie Ruemmele Date: Wed, 23 Jul 2025 15:13:34 -0600 Subject: [PATCH 1/3] chore: prompting for custom eval path, expected, operator --- src/commands/agent/generate/test-spec.ts | 74 ++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts index c843f75c..2d0ea147 100644 --- a/src/commands/agent/generate/test-spec.ts +++ b/src/commands/agent/generate/test-spec.ts @@ -25,6 +25,11 @@ type TestCase = { expectedActions: string[]; expectedTopic: string; expectedOutcome: string; + customEvaluation?: { + jsonPath: string; + expectedValue: string; + operator: string; + }; }; function castArray(value: T | T[]): T[] { @@ -42,6 +47,7 @@ function castArray(value: T | T[]): T[] { * - expectedTopic: The expected topic for classification * - expectedActions: Array of expected action names * - expectedOutcome: Expected outcome string + * - customEvaluation: Optional custom evaluation JSONpath * * @remarks * This function guides users through creating a test case by: @@ -49,6 +55,7 @@ function castArray(value: T | T[]): T[] { * 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle) * 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin) * 4. Defining an expected outcome + * 5. Optionally adding a custom evaluation JSONPath */ async function promptForTestCase(genAiPlugins: Record, genAiFunctions: string[]): Promise { const utterance = await input({ @@ -108,11 +115,78 @@ async function promptForTestCase(genAiPlugins: Record, genAiFunc theme, }); + const wantsCustomEvaluation = await confirm({ + message: 'Do you want to add a custom evaluation', + default: false, + theme, + }); + + let customEvaluation: { jsonPath: string; expectedValue: string; operator: string } | undefined; + if (wantsCustomEvaluation) { + /* + + + string_comparison + + operator + equals + false + + + actual + $.generatedData.invokedActions[*][?(@.function.name == 'DraftGenericReplyEmail')].function.input.recipient + true + + + expected + Jon + false + + + */ + const jsonPath = await input({ + message: 'Custom evaluation JSONPath', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'JSONPath cannot be empty'; + } + return true; + }, + theme, + }); + + const expectedValue = await input({ + message: 'Expected value', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'Expected value cannot be empty'; + } + return true; + }, + theme, + }); + + const operator = await select({ + message: 'Comparison operator', + choices: [ + { name: 'equals (Checks for numerical equality)', value: 'equals' }, + { name: 'greater_than_or_equal (Checks if actual >= expected)', value: 'greater_than_or_equal' }, + { name: 'greater_than (Checks if actual > expected)', value: 'greater_than' }, + { name: 'less_than (Checks if actual < expected)', value: 'less_than' }, + { name: 'less_than_or_equal (Checks if actual <= expected)', value: 'less_than_or_equal' }, + ], + theme, + }); + + customEvaluation = { jsonPath, expectedValue, operator }; + } + return { utterance, expectedTopic, expectedActions, expectedOutcome, + customEvaluation, }; } From 564dcc8aa419d9e931e14940495feaf75964c911 Mon Sep 17 00:00:00 2001 From: Willie Ruemmele Date: Fri, 25 Jul 2025 10:07:51 -0600 Subject: [PATCH 2/3] chore: move custom eval to method --- src/commands/agent/generate/test-spec.ts | 123 ++++++++++++++--------- 1 file changed, 74 insertions(+), 49 deletions(-) diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts index 2d0ea147..9caaa275 100644 --- a/src/commands/agent/generate/test-spec.ts +++ b/src/commands/agent/generate/test-spec.ts @@ -25,11 +25,15 @@ type TestCase = { expectedActions: string[]; expectedTopic: string; expectedOutcome: string; - customEvaluation?: { - jsonPath: string; - expectedValue: string; - operator: string; - }; + customEvaluations?: Array<{ + label: string; + name: string; + parameters: Array< + | { name: 'operator'; value: string; isReference: false } + | { name: 'actual'; value: string; isReference: true } + | { name: 'expected'; value: string; isReference: boolean } + >; + }>; }; function castArray(value: T | T[]): T[] { @@ -47,7 +51,7 @@ function castArray(value: T | T[]): T[] { * - expectedTopic: The expected topic for classification * - expectedActions: Array of expected action names * - expectedOutcome: Expected outcome string - * - customEvaluation: Optional custom evaluation JSONpath + * - customEvaluations: Optional array of custom evaluation JSONpaths, names, and required information for metadata * * @remarks * This function guides users through creating a test case by: @@ -55,7 +59,7 @@ function castArray(value: T | T[]): T[] { * 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle) * 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin) * 4. Defining an expected outcome - * 5. Optionally adding a custom evaluation JSONPath + * 5. Optional array of custom evaluation JSONpaths, names, and required information for metadata */ async function promptForTestCase(genAiPlugins: Record, genAiFunctions: string[]): Promise { const utterance = await input({ @@ -115,46 +119,68 @@ async function promptForTestCase(genAiPlugins: Record, genAiFunc theme, }); - const wantsCustomEvaluation = await confirm({ + const customEvaluations = await promptForCustomEvaluations(); + + return { + utterance, + expectedTopic, + expectedActions, + expectedOutcome, + customEvaluations, + }; +} + +async function promptForCustomEvaluations(): Promise> { + const customEvaluations: NonNullable = []; + let wantsCustomEvaluation = await confirm({ message: 'Do you want to add a custom evaluation', default: false, theme, }); - let customEvaluation: { jsonPath: string; expectedValue: string; operator: string } | undefined; - if (wantsCustomEvaluation) { - /* - - - string_comparison - - operator - equals - false - - - actual - $.generatedData.invokedActions[*][?(@.function.name == 'DraftGenericReplyEmail')].function.input.recipient - true - - - expected - Jon - false - - - */ + // we can have multiple custom evaluations, prompt until the user is done + while (wantsCustomEvaluation) { + // eslint-disable-next-line no-await-in-loop + const label = await input({ + message: 'Custom evaluation label (descriptive name)', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'Label cannot be empty'; + } + return true; + }, + theme, + }); + + // eslint-disable-next-line no-await-in-loop const jsonPath = await input({ - message: 'Custom evaluation JSONPath', + message: 'Custom evaluation JSONPath (starts with $)', validate: (d: string): boolean | string => { if (!d.length) { return 'JSONPath cannot be empty'; } + if (!d.startsWith('$')) { + return 'JSONPath must start with $'; + } return true; }, theme, }); + // eslint-disable-next-line no-await-in-loop + const operator = await select({ + message: 'Comparison operator', + choices: [ + { name: 'Equals ', value: 'equals' }, + { name: 'Greater than or equals (>=)', value: 'greater_than_or_equal' }, + { name: 'Greater than (>)', value: 'greater_than' }, + { name: 'Less than (<)', value: 'less_than' }, + { name: 'Less than or equals (<=)', value: 'less_than_or_equal' }, + ], + theme, + }); + + // eslint-disable-next-line no-await-in-loop const expectedValue = await input({ message: 'Expected value', validate: (d: string): boolean | string => { @@ -166,28 +192,27 @@ async function promptForTestCase(genAiPlugins: Record, genAiFunc theme, }); - const operator = await select({ - message: 'Comparison operator', - choices: [ - { name: 'equals (Checks for numerical equality)', value: 'equals' }, - { name: 'greater_than_or_equal (Checks if actual >= expected)', value: 'greater_than_or_equal' }, - { name: 'greater_than (Checks if actual > expected)', value: 'greater_than' }, - { name: 'less_than (Checks if actual < expected)', value: 'less_than' }, - { name: 'less_than_or_equal (Checks if actual <= expected)', value: 'less_than_or_equal' }, + customEvaluations.push({ + label, + // Determine if the expected value is numeric or string comparison + name: + !isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison', + parameters: [ + { name: 'operator', value: operator, isReference: false }, + { name: 'actual', value: jsonPath, isReference: true }, + { name: 'expected', value: expectedValue, isReference: false }, ], - theme, }); - customEvaluation = { jsonPath, expectedValue, operator }; + // eslint-disable-next-line no-await-in-loop + wantsCustomEvaluation = await confirm({ + message: 'Do you want to add another custom evaluation', + default: false, + theme, + }); } - return { - utterance, - expectedTopic, - expectedActions, - expectedOutcome, - customEvaluation, - }; + return customEvaluations; } function getMetadataFilePaths(cs: ComponentSet, type: string): Record { From 3fa8517b277e5832802bb5e9308ca80c2831010c Mon Sep 17 00:00:00 2001 From: Willie Ruemmele Date: Fri, 25 Jul 2025 14:42:25 -0600 Subject: [PATCH 3/3] refactor: move to testable methods, add UTs --- src/commands/agent/generate/test-spec.ts | 41 +++++++--- .../commands/agent/generate/test-spec.test.ts | 77 +++++++++++++++++++ 2 files changed, 106 insertions(+), 12 deletions(-) diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts index b802bf76..2c5adb62 100644 --- a/src/commands/agent/generate/test-spec.ts +++ b/src/commands/agent/generate/test-spec.ts @@ -126,7 +126,34 @@ async function promptForTestCase(genAiPlugins: Record, genAiFunc }; } -async function promptForCustomEvaluations(): Promise> { +/** + * Creates a custom evaluation object with the provided parameters + * + * @param label - Descriptive label for the evaluation + * @param jsonPath - JSONPath for the actual value + * @param operator - Comparison operator + * @param expectedValue - Expected value to compare against + * @returns Custom evaluation object in the expected format + */ +export function createCustomEvaluation( + label: string, + jsonPath: string, + operator: string, + expectedValue: string +): NonNullable[0] { + return { + label, + name: + !isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison', + parameters: [ + { name: 'operator', value: operator, isReference: false }, + { name: 'actual', value: jsonPath, isReference: true }, + { name: 'expected', value: expectedValue, isReference: false }, + ], + }; +} + +export async function promptForCustomEvaluations(): Promise> { const customEvaluations: NonNullable = []; let wantsCustomEvaluation = await confirm({ message: 'Do you want to add a custom evaluation', @@ -188,17 +215,7 @@ async function promptForCustomEvaluations(): Promise { @@ -378,4 +379,80 @@ describe('AgentGenerateTestSpec Helper Methods', () => { expect(result).to.not.have.property('*'); }); }); + + describe('createCustomEvaluation', () => { + it('should create correct structure for string comparison', () => { + const evaluation = createCustomEvaluation('Test Label', '$.response.message', 'equals', 'expected text'); + + expect(evaluation).to.deep.equal({ + label: 'Test Label', + name: 'string_comparison', + parameters: [ + { name: 'operator', value: 'equals', isReference: false }, + { name: 'actual', value: '$.response.message', isReference: true }, + { name: 'expected', value: 'expected text', isReference: false }, + ], + }); + }); + + it('should create correct structure for numeric comparison', () => { + const evaluation = createCustomEvaluation('Numeric Test', '$.metrics.score', 'greater_than_or_equal', '85'); + + expect(evaluation).to.deep.equal({ + label: 'Numeric Test', + name: 'numeric_comparison', + parameters: [ + { name: 'operator', value: 'greater_than_or_equal', isReference: false }, + { name: 'actual', value: '$.metrics.score', isReference: true }, + { name: 'expected', value: '85', isReference: false }, + ], + }); + }); + + it('should handle all supported operators', () => { + const operators = ['equals', 'greater_than_or_equal', 'greater_than', 'less_than', 'less_than_or_equal']; + + operators.forEach((operator) => { + const evaluation = createCustomEvaluation(`Test ${operator}`, '$.test.value', operator, '100'); + + expect(evaluation.parameters[0]).to.deep.equal({ + name: 'operator', + value: operator, + isReference: false, + }); + }); + }); + + it('should always set correct isReference flags', () => { + const evaluation = createCustomEvaluation('Reference Test', '$.actual.path', 'equals', 'expected'); + + const [operatorParam, actualParam, expectedParam] = evaluation.parameters; + + expect(operatorParam.isReference).to.be.false; + expect(actualParam.isReference).to.be.true; // actual is always a reference (JSONPath) + expect(expectedParam.isReference).to.be.false; // expected is always a literal value + }); + + it('should correctly determine comparison type based on expected value', () => { + const numericEvaluation = createCustomEvaluation('Test', '$.path', 'equals', '42'); + expect(numericEvaluation.name).to.equal('numeric_comparison'); + + const stringEvaluation = createCustomEvaluation('Test', '$.path', 'equals', 'text'); + expect(stringEvaluation.name).to.equal('string_comparison'); + }); + + it('should handle complex JSONPaths and values', () => { + const evaluation = createCustomEvaluation( + 'Complex Test', + '$.response.data[0].nested["special-key"].value', + 'less_than', + '3.14159' + ); + + expect(evaluation.label).to.equal('Complex Test'); + expect(evaluation.name).to.equal('numeric_comparison'); + expect(evaluation.parameters[1].value).to.equal('$.response.data[0].nested["special-key"].value'); + expect(evaluation.parameters[2].value).to.equal('3.14159'); + }); + }); });