diff --git a/plugins/promptfoo/src/agent/loop-eval.test.ts b/plugins/promptfoo/src/agent/loop-eval.test.ts new file mode 100644 index 0000000..930a8f2 --- /dev/null +++ b/plugins/promptfoo/src/agent/loop-eval.test.ts @@ -0,0 +1,32 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const execFileSyncMock = vi.fn(); + +vi.mock('node:child_process', () => ({ + execFileSync: execFileSyncMock, + execSync: vi.fn(), +})); + +describe('runPromptfooEval', () => { + beforeEach(() => { + execFileSyncMock.mockReset(); + }); + + it('invokes promptfoo eval with explicit argv', async () => { + execFileSyncMock.mockReturnValue('1 passed'); + + const { runPromptfooEval } = await import('./loop.js'); + + expect(runPromptfooEval('/tmp/job dir', 'promptfooconfig.yaml')).toBe('1 passed'); + expect(execFileSyncMock).toHaveBeenCalledWith( + 'npx', + ['promptfoo', 'eval', '-c', 'promptfooconfig.yaml', '--no-progress-bar'], + expect.objectContaining({ + cwd: '/tmp/job dir', + timeout: 120000, + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'], + }) + ); + }); +}); diff --git a/plugins/promptfoo/src/agent/loop.ts b/plugins/promptfoo/src/agent/loop.ts index 63a9c1e..60a43a7 100644 --- a/plugins/promptfoo/src/agent/loop.ts +++ b/plugins/promptfoo/src/agent/loop.ts @@ -14,7 +14,7 @@ import type { LLMProvider, Message, ToolCall, ChatResponse } from './providers.j import type { DiscoveryResult } from '../types.js'; import * as fs from 'node:fs'; import * as path from 'node:path'; -import { execSync } from 'node:child_process'; +import { execFileSync, execSync } from 'node:child_process'; import { pathToFileURL } from 'node:url'; export interface AgentOptions { @@ -41,6 +41,7 @@ export interface ToolResult { interface AgentState { configFile?: string; + verifyConfigFile?: string; providerFile?: string; envVars: Record; verified: boolean; @@ -193,6 +194,19 @@ Steps: }; } +export function runPromptfooEval(outputDir: string, configPath: string): string { + return execFileSync( + 'npx', + ['promptfoo', 'eval', '-c', configPath, '--no-progress-bar'], + { + cwd: outputDir, + timeout: 120000, + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'], + } + ); +} + /** * Execute a single tool call */ @@ -248,6 +262,7 @@ async function executeTool( }); state.configFile = generated.filePath; + state.verifyConfigFile = generated.verifyPath; state.envVars = { ...state.envVars, ...generated.envVars }; result = { @@ -277,7 +292,7 @@ async function executeTool( configFile?: string; }; - const configPath = configFile || state.configFile || 'promptfooconfig.yaml'; + const configPath = configFile || state.verifyConfigFile || 'promptfooconfig.yaml'; const steps: string[] = []; // Step 1: Direct provider smoke + session test @@ -329,10 +344,7 @@ async function executeTool( // Step 2: Run promptfoo eval try { - const output = execSync( - `cd "${outputDir}" && npx promptfoo eval -c "${configPath}" --no-progress-bar 2>&1`, - { timeout: 120000, encoding: 'utf-8' } - ); + const output = runPromptfooEval(outputDir, configPath); const passMatch = output.match(/(\d+) passed/); const failMatch = output.match(/(\d+) failed/); @@ -360,13 +372,15 @@ async function executeTool( steps, }; } catch (error) { - const err = error as { message: string; stdout?: string; stderr?: string }; - const stdout = err.stdout || ''; + const err = error as { message: string; stdout?: string | Buffer; stderr?: string | Buffer }; + const stdout = typeof err.stdout === 'string' ? err.stdout : err.stdout?.toString('utf-8') || ''; + const stderr = typeof err.stderr === 'string' ? err.stderr : err.stderr?.toString('utf-8') || ''; + const combinedOutput = stdout + stderr; - const passMatch = stdout.match(/(\d+) passed/); + const passMatch = combinedOutput.match(/(\d+) passed/); const passed = passMatch ? parseInt(passMatch[1]) : 0; - if (passed > 0 && !stdout.includes('failed')) { + if (passed > 0 && !combinedOutput.includes('failed')) { steps.push(`Eval PASSED (non-zero exit): ${passed} passed`); state.verified = true; } else { @@ -377,7 +391,7 @@ async function executeTool( result = { success: state.verified, error: state.verified ? undefined : err.message, - stdout: stdout.slice(0, 1000), + stdout: combinedOutput.slice(0, 1000), steps, }; } diff --git a/plugins/promptfoo/src/generator/config-filename.test.ts b/plugins/promptfoo/src/generator/config-filename.test.ts new file mode 100644 index 0000000..436322b --- /dev/null +++ b/plugins/promptfoo/src/generator/config-filename.test.ts @@ -0,0 +1,36 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; + +import { afterEach, describe, expect, it } from 'vitest'; + +import { generateConfig } from './config.js'; + +const tempDirs: string[] = []; + +afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } +}); + +describe('generateConfig filename handling', () => { + it('keeps the requested filename while writing a stable verify config alias', () => { + const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crabcode-config-')); + tempDirs.push(outputDir); + + const generated = generateConfig({ + description: 'Custom filename config', + providerType: 'http', + providerConfig: { url: 'https://example.com', method: 'GET' }, + outputDir, + filename: 'custom-config.yaml', + }); + + expect(generated.filePath).toBe(path.join(outputDir, 'custom-config.yaml')); + expect(generated.verifyPath).toBe('promptfooconfig.yaml'); + expect(fs.readFileSync(generated.filePath, 'utf-8')).toBe( + fs.readFileSync(path.join(outputDir, generated.verifyPath), 'utf-8') + ); + }); +}); diff --git a/plugins/promptfoo/src/generator/config-outputdir.test.ts b/plugins/promptfoo/src/generator/config-outputdir.test.ts new file mode 100644 index 0000000..fa5fe05 --- /dev/null +++ b/plugins/promptfoo/src/generator/config-outputdir.test.ts @@ -0,0 +1,34 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; + +import { afterEach, describe, expect, it } from 'vitest'; + +import { generateConfig } from './config.js'; + +const tempDirs: string[] = []; + +afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } +}); + +describe('generateConfig output paths', () => { + it('returns a verify path relative to the output directory', () => { + const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crabcode-config-')); + tempDirs.push(outputDir); + + const generated = generateConfig({ + description: 'Test config', + providerType: 'http', + providerConfig: { url: 'https://example.com', method: 'GET' }, + outputDir, + filename: 'nested-config.yaml', + }); + + expect(generated.filePath).toBe(path.join(outputDir, 'nested-config.yaml')); + expect(generated.verifyPath).toBe('promptfooconfig.yaml'); + expect(fs.existsSync(generated.filePath)).toBe(true); + }); +}); diff --git a/plugins/promptfoo/src/generator/config.ts b/plugins/promptfoo/src/generator/config.ts index bc749e8..eb90677 100644 --- a/plugins/promptfoo/src/generator/config.ts +++ b/plugins/promptfoo/src/generator/config.ts @@ -21,9 +21,12 @@ export interface GenerateConfigOptions { export interface GeneratedConfig { yaml: string; filePath: string; + verifyPath: string; envVars: Record; } +const DEFAULT_CONFIG_FILENAME = 'promptfooconfig.yaml'; + /** * Generate a promptfoo YAML config */ @@ -34,7 +37,7 @@ export function generateConfig(options: GenerateConfigOptions): GeneratedConfig providerConfig, envVars = {}, outputDir = '.', - filename = 'promptfooconfig.yaml', + filename = DEFAULT_CONFIG_FILENAME, } = options; // Validate providerConfig has required fields for http provider @@ -100,10 +103,16 @@ ${Object.entries(envVars).map(([k, v]) => `# ${k}: ${v}`).join('\n') || '# ( // Write the file const filePath = path.join(outputDir, filename); fs.writeFileSync(filePath, fullYaml, 'utf-8'); + const verifyPath = DEFAULT_CONFIG_FILENAME; + + if (verifyPath !== filename) { + fs.writeFileSync(path.join(outputDir, verifyPath), fullYaml, 'utf-8'); + } return { yaml: fullYaml, filePath, + verifyPath, envVars, }; }