From f78785077083790de06d3bd4df9389de19d00acf Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Wed, 23 Jul 2025 15:13:34 -0600
Subject: [PATCH 1/3] chore: prompting for custom eval path, expected, operator

---
 src/commands/agent/generate/test-spec.ts | 74 ++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts
index c843f75c..2d0ea147 100644
--- a/src/commands/agent/generate/test-spec.ts
+++ b/src/commands/agent/generate/test-spec.ts
@@ -25,6 +25,11 @@ type TestCase = {
   expectedActions: string[];
   expectedTopic: string;
   expectedOutcome: string;
+  customEvaluation?: {
+    jsonPath: string;
+    expectedValue: string;
+    operator: string;
+  };
 };
 
 function castArray<T>(value: T | T[]): T[] {
@@ -42,6 +47,7 @@ function castArray<T>(value: T | T[]): T[] {
  * - expectedTopic: The expected topic for classification
  * - expectedActions: Array of expected action names
  * - expectedOutcome: Expected outcome string
+ * - customEvaluation: Optional custom evaluation JSONpath
  *
  * @remarks
  * This function guides users through creating a test case by:
@@ -49,6 +55,7 @@ function castArray<T>(value: T | T[]): T[] {
  * 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle)
  * 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin)
  * 4. Defining an expected outcome
+ * 5. Optionally adding a custom evaluation JSONPath
  */
 async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunctions: string[]): Promise<TestCase> {
   const utterance = await input({
@@ -108,11 +115,78 @@ async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunc
     theme,
   });
 
+  const wantsCustomEvaluation = await confirm({
+    message: 'Do you want to add a custom evaluation',
+    default: false,
+    theme,
+  });
+
+  let customEvaluation: { jsonPath: string; expectedValue: string; operator: string } | undefined;
+  if (wantsCustomEvaluation) {
+    /*
+        <expectation>
+            <label>expected recipient match</label> 
+                <name>string_comparison</name>         
+                <parameter>
+                    <name>operator</name>
+                    <value>equals</value>
+                    <isReference>false</isReference>
+                </parameter>
+                <parameter>
+                    <name>actual</name>
+                    <value>$.generatedData.invokedActions[*][?(@.function.name == 'DraftGenericReplyEmail')].function.input.recipient</value>
+                    <isReference>true</isReference>
+                </parameter>
+                <parameter>
+                    <name>expected</name>
+                    <value>Jon</value>
+                    <isReference>false</isReference>
+                </parameter>
+        </expectation>
+    */
+    const jsonPath = await input({
+      message: 'Custom evaluation JSONPath',
+      validate: (d: string): boolean | string => {
+        if (!d.length) {
+          return 'JSONPath cannot be empty';
+        }
+        return true;
+      },
+      theme,
+    });
+
+    const expectedValue = await input({
+      message: 'Expected value',
+      validate: (d: string): boolean | string => {
+        if (!d.length) {
+          return 'Expected value cannot be empty';
+        }
+        return true;
+      },
+      theme,
+    });
+
+    const operator = await select<string>({
+      message: 'Comparison operator',
+      choices: [
+        { name: 'equals (Checks for numerical equality)', value: 'equals' },
+        { name: 'greater_than_or_equal (Checks if actual >= expected)', value: 'greater_than_or_equal' },
+        { name: 'greater_than (Checks if actual > expected)', value: 'greater_than' },
+        { name: 'less_than (Checks if actual < expected)', value: 'less_than' },
+        { name: 'less_than_or_equal (Checks if actual <= expected)', value: 'less_than_or_equal' },
+      ],
+      theme,
+    });
+
+    customEvaluation = { jsonPath, expectedValue, operator };
+  }
+
   return {
     utterance,
     expectedTopic,
     expectedActions,
     expectedOutcome,
+    customEvaluation,
   };
 }
 

From 564dcc8aa419d9e931e14940495feaf75964c911 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Fri, 25 Jul 2025 10:07:51 -0600
Subject: [PATCH 2/3] chore: move custom eval to method

---
 src/commands/agent/generate/test-spec.ts | 123 ++++++++++++++---------
 1 file changed, 74 insertions(+), 49 deletions(-)

diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts
index 2d0ea147..9caaa275 100644
--- a/src/commands/agent/generate/test-spec.ts
+++ b/src/commands/agent/generate/test-spec.ts
@@ -25,11 +25,15 @@ type TestCase = {
   expectedActions: string[];
   expectedTopic: string;
   expectedOutcome: string;
-  customEvaluation?: {
-    jsonPath: string;
-    expectedValue: string;
-    operator: string;
-  };
+  customEvaluations?: Array<{
+    label: string;
+    name: string;
+    parameters: Array<
+      | { name: 'operator'; value: string; isReference: false }
+      | { name: 'actual'; value: string; isReference: true }
+      | { name: 'expected'; value: string; isReference: boolean }
+    >;
+  }>;
 };
 
 function castArray<T>(value: T | T[]): T[] {
@@ -47,7 +51,7 @@ function castArray<T>(value: T | T[]): T[] {
  * - expectedTopic: The expected topic for classification
  * - expectedActions: Array of expected action names
  * - expectedOutcome: Expected outcome string
- * - customEvaluation: Optional custom evaluation JSONpath
+ * - customEvaluations: Optional array of custom evaluation JSONpaths, names, and required information for metadata
  *
  * @remarks
  * This function guides users through creating a test case by:
@@ -55,7 +59,7 @@ function castArray<T>(value: T | T[]): T[] {
  * 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle)
  * 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin)
  * 4. Defining an expected outcome
- * 5. Optionally adding a custom evaluation JSONPath
+ * 5. Optional array of custom evaluation JSONpaths, names, and required information for metadata
  */
 async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunctions: string[]): Promise<TestCase> {
   const utterance = await input({
@@ -115,46 +119,68 @@ async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunc
     theme,
   });
 
-  const wantsCustomEvaluation = await confirm({
+  const customEvaluations = await promptForCustomEvaluations();
+
+  return {
+    utterance,
+    expectedTopic,
+    expectedActions,
+    expectedOutcome,
+    customEvaluations,
+  };
+}
+
+async function promptForCustomEvaluations(): Promise<NonNullable<TestCase['customEvaluations']>> {
+  const customEvaluations: NonNullable<TestCase['customEvaluations']> = [];
+  let wantsCustomEvaluation = await confirm({
     message: 'Do you want to add a custom evaluation',
     default: false,
     theme,
   });
 
-  let customEvaluation: { jsonPath: string; expectedValue: string; operator: string } | undefined;
-  if (wantsCustomEvaluation) {
-    /*
-        <expectation>
-            <label>expected recipient match</label> 
-                <name>string_comparison</name>         
-                <parameter>
-                    <name>operator</name>
-                    <value>equals</value>
-                    <isReference>false</isReference>
-                </parameter>
-                <parameter>
-                    <name>actual</name>
-                    <value>$.generatedData.invokedActions[*][?(@.function.name == 'DraftGenericReplyEmail')].function.input.recipient</value>
-                    <isReference>true</isReference>
-                </parameter>
-                <parameter>
-                    <name>expected</name>
-                    <value>Jon</value>
-                    <isReference>false</isReference>
-                </parameter>
-        </expectation>
-    */
+  // we can have multiple custom evaluations, prompt until the user is done
+  while (wantsCustomEvaluation) {
+    // eslint-disable-next-line no-await-in-loop
+    const label = await input({
+      message: 'Custom evaluation label (descriptive name)',
+      validate: (d: string): boolean | string => {
+        if (!d.length) {
+          return 'Label cannot be empty';
+        }
+        return true;
+      },
+      theme,
+    });
+
+    // eslint-disable-next-line no-await-in-loop
     const jsonPath = await input({
-      message: 'Custom evaluation JSONPath',
+      message: 'Custom evaluation JSONPath (starts with $)',
       validate: (d: string): boolean | string => {
         if (!d.length) {
           return 'JSONPath cannot be empty';
         }
+        if (!d.startsWith('$')) {
+          return 'JSONPath must start with $';
+        }
         return true;
       },
       theme,
     });
 
+    // eslint-disable-next-line no-await-in-loop
+    const operator = await select<string>({
+      message: 'Comparison operator',
+      choices: [
+        { name: 'Equals ', value: 'equals' },
+        { name: 'Greater than or equals (>=)', value: 'greater_than_or_equal' },
+        { name: 'Greater than (>)', value: 'greater_than' },
+        { name: 'Less than (<)', value: 'less_than' },
+        { name: 'Less than or equals (<=)', value: 'less_than_or_equal' },
+      ],
+      theme,
+    });
+
+    // eslint-disable-next-line no-await-in-loop
     const expectedValue = await input({
       message: 'Expected value',
       validate: (d: string): boolean | string => {
@@ -166,28 +192,27 @@ async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunc
       theme,
     });
 
-    const operator = await select<string>({
-      message: 'Comparison operator',
-      choices: [
-        { name: 'equals (Checks for numerical equality)', value: 'equals' },
-        { name: 'greater_than_or_equal (Checks if actual >= expected)', value: 'greater_than_or_equal' },
-        { name: 'greater_than (Checks if actual > expected)', value: 'greater_than' },
-        { name: 'less_than (Checks if actual < expected)', value: 'less_than' },
-        { name: 'less_than_or_equal (Checks if actual <= expected)', value: 'less_than_or_equal' },
+    customEvaluations.push({
+      label,
+      // Determine if the expected value is numeric or string comparison
+      name:
+        !isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison',
+      parameters: [
+        { name: 'operator', value: operator, isReference: false },
+        { name: 'actual', value: jsonPath, isReference: true },
+        { name: 'expected', value: expectedValue, isReference: false },
       ],
-      theme,
     });
 
-    customEvaluation = { jsonPath, expectedValue, operator };
+    // eslint-disable-next-line no-await-in-loop
+    wantsCustomEvaluation = await confirm({
+      message: 'Do you want to add another custom evaluation',
+      default: false,
+      theme,
+    });
   }
 
-  return {
-    utterance,
-    expectedTopic,
-    expectedActions,
-    expectedOutcome,
-    customEvaluation,
-  };
+  return customEvaluations;
 }
 
 function getMetadataFilePaths(cs: ComponentSet, type: string): Record<string, string> {

From 3fa8517b277e5832802bb5e9308ca80c2831010c Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Fri, 25 Jul 2025 14:42:25 -0600
Subject: [PATCH 3/3] refactor: move to testable methods, add UTs

---
 src/commands/agent/generate/test-spec.ts      | 41 +++++++---
 .../commands/agent/generate/test-spec.test.ts | 77 +++++++++++++++++++
 2 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts
index b802bf76..2c5adb62 100644
--- a/src/commands/agent/generate/test-spec.ts
+++ b/src/commands/agent/generate/test-spec.ts
@@ -126,7 +126,34 @@ async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunc
   };
 }
 
-async function promptForCustomEvaluations(): Promise<NonNullable<TestCase['customEvaluations']>> {
+/**
+ * Creates a custom evaluation object with the provided parameters
+ *
+ * @param label - Descriptive label for the evaluation
+ * @param jsonPath - JSONPath for the actual value
+ * @param operator - Comparison operator
+ * @param expectedValue - Expected value to compare against
+ * @returns Custom evaluation object in the expected format
+ */
+export function createCustomEvaluation(
+  label: string,
+  jsonPath: string,
+  operator: string,
+  expectedValue: string
+): NonNullable<TestCase['customEvaluations']>[0] {
+  return {
+    label,
+    name:
+      !isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison',
+    parameters: [
+      { name: 'operator', value: operator, isReference: false },
+      { name: 'actual', value: jsonPath, isReference: true },
+      { name: 'expected', value: expectedValue, isReference: false },
+    ],
+  };
+}
+
+export async function promptForCustomEvaluations(): Promise<NonNullable<TestCase['customEvaluations']>> {
   const customEvaluations: NonNullable<TestCase['customEvaluations']> = [];
   let wantsCustomEvaluation = await confirm({
     message: 'Do you want to add a custom evaluation',
@@ -188,17 +215,7 @@ async function promptForCustomEvaluations(): Promise<NonNullable<TestCase['custo
       theme,
     });
 
-    customEvaluations.push({
-      label,
-      // Determine if the expected value is numeric or string comparison
-      name:
-        !isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison',
-      parameters: [
-        { name: 'operator', value: operator, isReference: false },
-        { name: 'actual', value: jsonPath, isReference: true },
-        { name: 'expected', value: expectedValue, isReference: false },
-      ],
-    });
+    customEvaluations.push(createCustomEvaluation(label, jsonPath, operator, expectedValue));
 
     // eslint-disable-next-line no-await-in-loop
     wantsCustomEvaluation = await confirm({
diff --git a/test/commands/agent/generate/test-spec.test.ts b/test/commands/agent/generate/test-spec.test.ts
index 395b0a96..3b850dbe 100644
--- a/test/commands/agent/generate/test-spec.test.ts
+++ b/test/commands/agent/generate/test-spec.test.ts
@@ -16,6 +16,7 @@ import {
   ensureYamlExtension,
   getMetadataFilePaths,
   getPluginsAndFunctions,
+  createCustomEvaluation,
 } from '../../../../src/commands/agent/generate/test-spec.js';
 
 describe('AgentGenerateTestSpec Helper Methods', () => {
@@ -378,4 +379,80 @@ describe('AgentGenerateTestSpec Helper Methods', () => {
       expect(result).to.not.have.property('*');
     });
   });
+
+  describe('createCustomEvaluation', () => {
+    it('should create correct structure for string comparison', () => {
+      const evaluation = createCustomEvaluation('Test Label', '$.response.message', 'equals', 'expected text');
+
+      expect(evaluation).to.deep.equal({
+        label: 'Test Label',
+        name: 'string_comparison',
+        parameters: [
+          { name: 'operator', value: 'equals', isReference: false },
+          { name: 'actual', value: '$.response.message', isReference: true },
+          { name: 'expected', value: 'expected text', isReference: false },
+        ],
+      });
+    });
+
+    it('should create correct structure for numeric comparison', () => {
+      const evaluation = createCustomEvaluation('Numeric Test', '$.metrics.score', 'greater_than_or_equal', '85');
+
+      expect(evaluation).to.deep.equal({
+        label: 'Numeric Test',
+        name: 'numeric_comparison',
+        parameters: [
+          { name: 'operator', value: 'greater_than_or_equal', isReference: false },
+          { name: 'actual', value: '$.metrics.score', isReference: true },
+          { name: 'expected', value: '85', isReference: false },
+        ],
+      });
+    });
+
+    it('should handle all supported operators', () => {
+      const operators = ['equals', 'greater_than_or_equal', 'greater_than', 'less_than', 'less_than_or_equal'];
+
+      operators.forEach((operator) => {
+        const evaluation = createCustomEvaluation(`Test ${operator}`, '$.test.value', operator, '100');
+
+        expect(evaluation.parameters[0]).to.deep.equal({
+          name: 'operator',
+          value: operator,
+          isReference: false,
+        });
+      });
+    });
+
+    it('should always set correct isReference flags', () => {
+      const evaluation = createCustomEvaluation('Reference Test', '$.actual.path', 'equals', 'expected');
+
+      const [operatorParam, actualParam, expectedParam] = evaluation.parameters;
+
+      expect(operatorParam.isReference).to.be.false;
+      expect(actualParam.isReference).to.be.true; // actual is always a reference (JSONPath)
+      expect(expectedParam.isReference).to.be.false; // expected is always a literal value
+    });
+
+    it('should correctly determine comparison type based on expected value', () => {
+      const numericEvaluation = createCustomEvaluation('Test', '$.path', 'equals', '42');
+      expect(numericEvaluation.name).to.equal('numeric_comparison');
+
+      const stringEvaluation = createCustomEvaluation('Test', '$.path', 'equals', 'text');
+      expect(stringEvaluation.name).to.equal('string_comparison');
+    });
+
+    it('should handle complex JSONPaths and values', () => {
+      const evaluation = createCustomEvaluation(
+        'Complex Test',
+        '$.response.data[0].nested["special-key"].value',
+        'less_than',
+        '3.14159'
+      );
+
+      expect(evaluation.label).to.equal('Complex Test');
+      expect(evaluation.name).to.equal('numeric_comparison');
+      expect(evaluation.parameters[1].value).to.equal('$.response.data[0].nested["special-key"].value');
+      expect(evaluation.parameters[2].value).to.equal('3.14159');
+    });
+  });
 });