diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts index 4f58f23e..2c5adb62 100644 --- a/src/commands/agent/generate/test-spec.ts +++ b/src/commands/agent/generate/test-spec.ts @@ -26,6 +26,15 @@ type TestCase = { expectedActions: string[]; expectedTopic: string; expectedOutcome: string; + customEvaluations?: Array<{ + label: string; + name: string; + parameters: Array< + | { name: 'operator'; value: string; isReference: false } + | { name: 'actual'; value: string; isReference: true } + | { name: 'expected'; value: string; isReference: boolean } + >; + }>; }; /** @@ -38,6 +47,7 @@ type TestCase = { * - expectedTopic: The expected topic for classification * - expectedActions: Array of expected action names * - expectedOutcome: Expected outcome string + * - customEvaluations: Optional array of custom evaluation JSONpaths, names, and required information for metadata * * @remarks * This function guides users through creating a test case by: @@ -45,6 +55,7 @@ type TestCase = { * 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle) * 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin) * 4. Defining an expected outcome + * 5. Optional array of custom evaluation JSONpaths, names, and required information for metadata */ async function promptForTestCase(genAiPlugins: Record, genAiFunctions: string[]): Promise { const utterance = await input({ @@ -104,14 +115,119 @@ async function promptForTestCase(genAiPlugins: Record, genAiFunc theme, }); + const customEvaluations = await promptForCustomEvaluations(); + return { utterance, expectedTopic, expectedActions, expectedOutcome, + customEvaluations, + }; +} + +/** + * Creates a custom evaluation object with the provided parameters + * + * @param label - Descriptive label for the evaluation + * @param jsonPath - JSONPath for the actual value + * @param operator - Comparison operator + * @param expectedValue - Expected value to compare against + * @returns Custom evaluation object in the expected format + */ +export function createCustomEvaluation( + label: string, + jsonPath: string, + operator: string, + expectedValue: string +): NonNullable[0] { + return { + label, + name: + !isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison', + parameters: [ + { name: 'operator', value: operator, isReference: false }, + { name: 'actual', value: jsonPath, isReference: true }, + { name: 'expected', value: expectedValue, isReference: false }, + ], }; } +export async function promptForCustomEvaluations(): Promise> { + const customEvaluations: NonNullable = []; + let wantsCustomEvaluation = await confirm({ + message: 'Do you want to add a custom evaluation', + default: false, + theme, + }); + + // we can have multiple custom evaluations, prompt until the user is done + while (wantsCustomEvaluation) { + // eslint-disable-next-line no-await-in-loop + const label = await input({ + message: 'Custom evaluation label (descriptive name)', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'Label cannot be empty'; + } + return true; + }, + theme, + }); + + // eslint-disable-next-line no-await-in-loop + const jsonPath = await input({ + message: 'Custom evaluation JSONPath (starts with $)', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'JSONPath cannot be empty'; + } + if (!d.startsWith('$')) { + return 'JSONPath must start with $'; + } + return true; + }, + theme, + }); + + // eslint-disable-next-line no-await-in-loop + const operator = await select({ + message: 'Comparison operator', + choices: [ + { name: 'Equals ', value: 'equals' }, + { name: 'Greater than or equals (>=)', value: 'greater_than_or_equal' }, + { name: 'Greater than (>)', value: 'greater_than' }, + { name: 'Less than (<)', value: 'less_than' }, + { name: 'Less than or equals (<=)', value: 'less_than_or_equal' }, + ], + theme, + }); + + // eslint-disable-next-line no-await-in-loop + const expectedValue = await input({ + message: 'Expected value', + validate: (d: string): boolean | string => { + if (!d.length) { + return 'Expected value cannot be empty'; + } + return true; + }, + theme, + }); + + customEvaluations.push(createCustomEvaluation(label, jsonPath, operator, expectedValue)); + + // eslint-disable-next-line no-await-in-loop + wantsCustomEvaluation = await confirm({ + message: 'Do you want to add another custom evaluation', + default: false, + theme, + }); + } + + return customEvaluations; +} + export function getMetadataFilePaths(cs: ComponentSet, type: string): Record { return [...cs.filter((component) => component.type.name === type && component.fullName !== '*')].reduce< Record diff --git a/test/commands/agent/generate/test-spec.test.ts b/test/commands/agent/generate/test-spec.test.ts index 395b0a96..3b850dbe 100644 --- a/test/commands/agent/generate/test-spec.test.ts +++ b/test/commands/agent/generate/test-spec.test.ts @@ -16,6 +16,7 @@ import { ensureYamlExtension, getMetadataFilePaths, getPluginsAndFunctions, + createCustomEvaluation, } from '../../../../src/commands/agent/generate/test-spec.js'; describe('AgentGenerateTestSpec Helper Methods', () => { @@ -378,4 +379,80 @@ describe('AgentGenerateTestSpec Helper Methods', () => { expect(result).to.not.have.property('*'); }); }); + + describe('createCustomEvaluation', () => { + it('should create correct structure for string comparison', () => { + const evaluation = createCustomEvaluation('Test Label', '$.response.message', 'equals', 'expected text'); + + expect(evaluation).to.deep.equal({ + label: 'Test Label', + name: 'string_comparison', + parameters: [ + { name: 'operator', value: 'equals', isReference: false }, + { name: 'actual', value: '$.response.message', isReference: true }, + { name: 'expected', value: 'expected text', isReference: false }, + ], + }); + }); + + it('should create correct structure for numeric comparison', () => { + const evaluation = createCustomEvaluation('Numeric Test', '$.metrics.score', 'greater_than_or_equal', '85'); + + expect(evaluation).to.deep.equal({ + label: 'Numeric Test', + name: 'numeric_comparison', + parameters: [ + { name: 'operator', value: 'greater_than_or_equal', isReference: false }, + { name: 'actual', value: '$.metrics.score', isReference: true }, + { name: 'expected', value: '85', isReference: false }, + ], + }); + }); + + it('should handle all supported operators', () => { + const operators = ['equals', 'greater_than_or_equal', 'greater_than', 'less_than', 'less_than_or_equal']; + + operators.forEach((operator) => { + const evaluation = createCustomEvaluation(`Test ${operator}`, '$.test.value', operator, '100'); + + expect(evaluation.parameters[0]).to.deep.equal({ + name: 'operator', + value: operator, + isReference: false, + }); + }); + }); + + it('should always set correct isReference flags', () => { + const evaluation = createCustomEvaluation('Reference Test', '$.actual.path', 'equals', 'expected'); + + const [operatorParam, actualParam, expectedParam] = evaluation.parameters; + + expect(operatorParam.isReference).to.be.false; + expect(actualParam.isReference).to.be.true; // actual is always a reference (JSONPath) + expect(expectedParam.isReference).to.be.false; // expected is always a literal value + }); + + it('should correctly determine comparison type based on expected value', () => { + const numericEvaluation = createCustomEvaluation('Test', '$.path', 'equals', '42'); + expect(numericEvaluation.name).to.equal('numeric_comparison'); + + const stringEvaluation = createCustomEvaluation('Test', '$.path', 'equals', 'text'); + expect(stringEvaluation.name).to.equal('string_comparison'); + }); + + it('should handle complex JSONPaths and values', () => { + const evaluation = createCustomEvaluation( + 'Complex Test', + '$.response.data[0].nested["special-key"].value', + 'less_than', + '3.14159' + ); + + expect(evaluation.label).to.equal('Complex Test'); + expect(evaluation.name).to.equal('numeric_comparison'); + expect(evaluation.parameters[1].value).to.equal('$.response.data[0].nested["special-key"].value'); + expect(evaluation.parameters[2].value).to.equal('3.14159'); + }); + }); });