Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions plugins/promptfoo/src/agent/loop-eval.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';

const execFileSyncMock = vi.fn();

vi.mock('node:child_process', () => ({
execFileSync: execFileSyncMock,
execSync: vi.fn(),
}));

describe('runPromptfooEval', () => {
beforeEach(() => {
execFileSyncMock.mockReset();
});

it('invokes promptfoo eval with explicit argv', async () => {
execFileSyncMock.mockReturnValue('1 passed');

const { runPromptfooEval } = await import('./loop.js');

expect(runPromptfooEval('/tmp/job dir', 'promptfooconfig.yaml')).toBe('1 passed');
expect(execFileSyncMock).toHaveBeenCalledWith(
'npx',
['promptfoo', 'eval', '-c', 'promptfooconfig.yaml', '--no-progress-bar'],
expect.objectContaining({
cwd: '/tmp/job dir',
timeout: 120000,
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
})
);
});
});
36 changes: 25 additions & 11 deletions plugins/promptfoo/src/agent/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import type { LLMProvider, Message, ToolCall, ChatResponse } from './providers.j
import type { DiscoveryResult } from '../types.js';
import * as fs from 'node:fs';
import * as path from 'node:path';
import { execSync } from 'node:child_process';
import { execFileSync, execSync } from 'node:child_process';
import { pathToFileURL } from 'node:url';

export interface AgentOptions {
Expand All @@ -41,6 +41,7 @@ export interface ToolResult {

interface AgentState {
configFile?: string;
verifyConfigFile?: string;
providerFile?: string;
envVars: Record<string, string>;
verified: boolean;
Expand Down Expand Up @@ -193,6 +194,19 @@ Steps:
};
}

export function runPromptfooEval(outputDir: string, configPath: string): string {
return execFileSync(
'npx',
['promptfoo', 'eval', '-c', configPath, '--no-progress-bar'],
{
cwd: outputDir,
timeout: 120000,
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe'],
}
);
}

/**
* Execute a single tool call
*/
Expand Down Expand Up @@ -248,6 +262,7 @@ async function executeTool(
});

state.configFile = generated.filePath;
state.verifyConfigFile = generated.verifyPath;
state.envVars = { ...state.envVars, ...generated.envVars };

result = {
Expand Down Expand Up @@ -277,7 +292,7 @@ async function executeTool(
configFile?: string;
};

const configPath = configFile || state.configFile || 'promptfooconfig.yaml';
const configPath = configFile || state.verifyConfigFile || 'promptfooconfig.yaml';
const steps: string[] = [];

// Step 1: Direct provider smoke + session test
Expand Down Expand Up @@ -329,10 +344,7 @@ async function executeTool(

// Step 2: Run promptfoo eval
try {
const output = execSync(
`cd "${outputDir}" && npx promptfoo eval -c "${configPath}" --no-progress-bar 2>&1`,
{ timeout: 120000, encoding: 'utf-8' }
);
const output = runPromptfooEval(outputDir, configPath);

const passMatch = output.match(/(\d+) passed/);
const failMatch = output.match(/(\d+) failed/);
Expand Down Expand Up @@ -360,13 +372,15 @@ async function executeTool(
steps,
};
} catch (error) {
const err = error as { message: string; stdout?: string; stderr?: string };
const stdout = err.stdout || '';
const err = error as { message: string; stdout?: string | Buffer; stderr?: string | Buffer };
const stdout = typeof err.stdout === 'string' ? err.stdout : err.stdout?.toString('utf-8') || '';
const stderr = typeof err.stderr === 'string' ? err.stderr : err.stderr?.toString('utf-8') || '';
const combinedOutput = stdout + stderr;

const passMatch = stdout.match(/(\d+) passed/);
const passMatch = combinedOutput.match(/(\d+) passed/);
const passed = passMatch ? parseInt(passMatch[1]) : 0;

if (passed > 0 && !stdout.includes('failed')) {
if (passed > 0 && !combinedOutput.includes('failed')) {
steps.push(`Eval PASSED (non-zero exit): ${passed} passed`);
state.verified = true;
} else {
Expand All @@ -377,7 +391,7 @@ async function executeTool(
result = {
success: state.verified,
error: state.verified ? undefined : err.message,
stdout: stdout.slice(0, 1000),
stdout: combinedOutput.slice(0, 1000),
steps,
};
}
Expand Down
36 changes: 36 additions & 0 deletions plugins/promptfoo/src/generator/config-filename.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import * as fs from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';

import { afterEach, describe, expect, it } from 'vitest';

import { generateConfig } from './config.js';

const tempDirs: string[] = [];

afterEach(() => {
for (const dir of tempDirs.splice(0)) {
fs.rmSync(dir, { recursive: true, force: true });
}
});

describe('generateConfig filename handling', () => {
it('keeps the requested filename while writing a stable verify config alias', () => {
const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crabcode-config-'));
tempDirs.push(outputDir);

const generated = generateConfig({
description: 'Custom filename config',
providerType: 'http',
providerConfig: { url: 'https://example.com', method: 'GET' },
outputDir,
filename: 'custom-config.yaml',
});

expect(generated.filePath).toBe(path.join(outputDir, 'custom-config.yaml'));
expect(generated.verifyPath).toBe('promptfooconfig.yaml');
expect(fs.readFileSync(generated.filePath, 'utf-8')).toBe(
fs.readFileSync(path.join(outputDir, generated.verifyPath), 'utf-8')
);
});
});
34 changes: 34 additions & 0 deletions plugins/promptfoo/src/generator/config-outputdir.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import * as fs from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';

import { afterEach, describe, expect, it } from 'vitest';

import { generateConfig } from './config.js';

const tempDirs: string[] = [];

afterEach(() => {
for (const dir of tempDirs.splice(0)) {
fs.rmSync(dir, { recursive: true, force: true });
}
});

describe('generateConfig output paths', () => {
it('returns a verify path relative to the output directory', () => {
const outputDir = fs.mkdtempSync(path.join(os.tmpdir(), 'crabcode-config-'));
tempDirs.push(outputDir);

const generated = generateConfig({
description: 'Test config',
providerType: 'http',
providerConfig: { url: 'https://example.com', method: 'GET' },
outputDir,
filename: 'nested-config.yaml',
});

expect(generated.filePath).toBe(path.join(outputDir, 'nested-config.yaml'));
expect(generated.verifyPath).toBe('promptfooconfig.yaml');
expect(fs.existsSync(generated.filePath)).toBe(true);
});
});
11 changes: 10 additions & 1 deletion plugins/promptfoo/src/generator/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ export interface GenerateConfigOptions {
export interface GeneratedConfig {
yaml: string;
filePath: string;
verifyPath: string;
envVars: Record<string, string>;
}

const DEFAULT_CONFIG_FILENAME = 'promptfooconfig.yaml';

/**
* Generate a promptfoo YAML config
*/
Expand All @@ -34,7 +37,7 @@ export function generateConfig(options: GenerateConfigOptions): GeneratedConfig
providerConfig,
envVars = {},
outputDir = '.',
filename = 'promptfooconfig.yaml',
filename = DEFAULT_CONFIG_FILENAME,
} = options;

// Validate providerConfig has required fields for http provider
Expand Down Expand Up @@ -100,10 +103,16 @@ ${Object.entries(envVars).map(([k, v]) => `# ${k}: ${v}`).join('\n') || '# (
// Write the file
const filePath = path.join(outputDir, filename);
fs.writeFileSync(filePath, fullYaml, 'utf-8');
const verifyPath = DEFAULT_CONFIG_FILENAME;

if (verifyPath !== filename) {
fs.writeFileSync(path.join(outputDir, verifyPath), fullYaml, 'utf-8');
}

return {
yaml: fullYaml,
filePath,
verifyPath,
envVars,
};
}
Expand Down