Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pass evalId in results #758

Merged
merged 6 commits into from
Jun 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions site/docs/integrations/ci-cd.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ The schema of the `output.json` file is defined [here](https://github.com/prompt

```typescript
interface OutputFile {
evalId?: string
results: EvaluateSummary;
config: Partial<UnifiedConfig>;
shareableUrl: string | null;
Expand Down
15 changes: 8 additions & 7 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,20 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
...options,
});

let evalId: string | null = null
if (testSuite.writeLatestResults) {
await migrateResultsFromFileSystemToDatabase();
evalId = await writeResultsToDatabase(ret, testSuite);
}

if (testSuite.outputPath) {
if (typeof testSuite.outputPath === 'string') {
await writeOutput(testSuite.outputPath, ret, testSuite, null);
await writeOutput(testSuite.outputPath, evalId, ret, testSuite, null);
} else if (Array.isArray(testSuite.outputPath)) {
await writeMultipleOutputs(testSuite.outputPath, ret, testSuite, null);
await writeMultipleOutputs(testSuite.outputPath, evalId, ret, testSuite, null);
}
}

if (testSuite.writeLatestResults) {
await migrateResultsFromFileSystemToDatabase();
await writeResultsToDatabase(ret, testSuite);
}

await telemetry.send();
return ret;
}
Expand Down
11 changes: 7 additions & 4 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -789,13 +789,18 @@ async function main() {
);
}

let evalId: string | null = null
if (cmdObj.write) {
evalId = await writeResultsToDatabase(summary, config);
}

const { outputPath } = config;
if (outputPath) {
// Write output to file
if (typeof outputPath === 'string') {
await writeOutput(outputPath, summary, config, shareableUrl);
await writeOutput(outputPath, evalId, summary, config, shareableUrl);
} else if (Array.isArray(outputPath)) {
await writeMultipleOutputs(outputPath, summary, config, shareableUrl);
await writeMultipleOutputs(outputPath, evalId, summary, config, shareableUrl);
}
logger.info(chalk.yellow(`Writing output to ${outputPath}`));
}
Expand All @@ -808,8 +813,6 @@ async function main() {
if (!cmdObj.write) {
logger.info(`${chalk.green('✔')} Evaluation complete`);
} else {
await writeResultsToDatabase(summary, config);

if (shareableUrl) {
logger.info(`${chalk.green('✔')} Evaluation complete: ${shareableUrl}`);
} else {
Expand Down
25 changes: 13 additions & 12 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -539,13 +539,13 @@ export interface TestCase<Vars = Record<string, string | string[] | object>> {

// Additional configuration settings for the prompt
options?: PromptConfig &
OutputConfig &
GradingConfig & {
// If true, do not expand arrays of variables into multiple eval cases.
disableVarExpansion?: boolean;
// If true, do not include an implicit `_conversation` variable in the prompt.
disableConversationVar?: boolean;
};
OutputConfig &
GradingConfig & {
// If true, do not expand arrays of variables into multiple eval cases.
disableVarExpansion?: boolean;
// If true, do not include an implicit `_conversation` variable in the prompt.
disableConversationVar?: boolean;
};

// The required score for this test case. If not provided, the test case is graded pass/fail.
threshold?: number;
Expand Down Expand Up @@ -643,11 +643,11 @@ export interface TestSuiteConfig {

// Determines whether or not sharing is enabled.
sharing?:
| boolean
| {
apiBaseUrl?: string;
appBaseUrl?: string;
};
| boolean
| {
apiBaseUrl?: string;
appBaseUrl?: string;
};

// Nunjucks filters
nunjucksFilters?: Record<string, FilePath>;
Expand Down Expand Up @@ -693,6 +693,7 @@ export interface ResultsFile {

// File exported as --output option
export interface OutputFile {
evalId: string | null;
results: EvaluateSummary;
config: Partial<UnifiedConfig>;
shareableUrl: string | null;
Expand Down
6 changes: 4 additions & 2 deletions src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -357,17 +357,19 @@ export async function readConfigs(configPaths: string[]): Promise<UnifiedConfig>

export async function writeMultipleOutputs(
outputPaths: string[],
evalId: string | null,
results: EvaluateSummary,
config: Partial<UnifiedConfig>,
shareableUrl: string | null,
) {
await Promise.all(
outputPaths.map((outputPath) => writeOutput(outputPath, results, config, shareableUrl)),
outputPaths.map((outputPath) => writeOutput(outputPath, evalId, results, config, shareableUrl)),
);
}

export async function writeOutput(
outputPath: string,
evalId: string | null,
results: EvaluateSummary,
config: Partial<UnifiedConfig>,
shareableUrl: string | null,
Expand Down Expand Up @@ -424,7 +426,7 @@ ${gradingResultText}`.trim();
} else if (outputExtension === 'json') {
fs.writeFileSync(
outputPath,
JSON.stringify({ results, config, shareableUrl } satisfies OutputFile, null, 2),
JSON.stringify({ evalId, results, config, shareableUrl } satisfies OutputFile, null, 2),
);
} else if (
outputExtension === 'yaml' ||
Expand Down
8 changes: 4 additions & 4 deletions test/util.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeOutput(outputPath, summary, config, shareableUrl);
writeOutput(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(1);
});
Expand Down Expand Up @@ -194,7 +194,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeOutput(outputPath, summary, config, shareableUrl);
writeOutput(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(1);
});
Expand Down Expand Up @@ -266,7 +266,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeOutput(outputPath, summary, config, shareableUrl);
writeOutput(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(1);
});
Expand Down Expand Up @@ -338,7 +338,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeMultipleOutputs(outputPath, summary, config, shareableUrl);
writeMultipleOutputs(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(2);
});
Expand Down