Skip to content

Commit

Permalink
feat: include evalId in results (#758)
Browse files Browse the repository at this point in the history
Thanks @pelikhan for this change and @mldangelo for fixing the tests!

---------

Co-authored-by: Ian Webster <ianw_github@ianww.com>
Co-authored-by: Michael D'Angelo <michael.l.dangelo@gmail.com>
  • Loading branch information
3 people committed Jun 2, 2024
1 parent 58215da commit 758f841
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 29 deletions.
1 change: 1 addition & 0 deletions site/docs/integrations/ci-cd.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ The schema of the `output.json` file is defined [here](https://github.com/prompt

```typescript
interface OutputFile {
evalId?: string
results: EvaluateSummary;
config: Partial<UnifiedConfig>;
shareableUrl: string | null;
Expand Down
15 changes: 8 additions & 7 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,20 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
...options,
});

let evalId: string | null = null
if (testSuite.writeLatestResults) {
await migrateResultsFromFileSystemToDatabase();
evalId = await writeResultsToDatabase(ret, testSuite);
}

if (testSuite.outputPath) {
if (typeof testSuite.outputPath === 'string') {
await writeOutput(testSuite.outputPath, ret, testSuite, null);
await writeOutput(testSuite.outputPath, evalId, ret, testSuite, null);
} else if (Array.isArray(testSuite.outputPath)) {
await writeMultipleOutputs(testSuite.outputPath, ret, testSuite, null);
await writeMultipleOutputs(testSuite.outputPath, evalId, ret, testSuite, null);
}
}

if (testSuite.writeLatestResults) {
await migrateResultsFromFileSystemToDatabase();
await writeResultsToDatabase(ret, testSuite);
}

await telemetry.send();
return ret;
}
Expand Down
11 changes: 7 additions & 4 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -789,13 +789,18 @@ async function main() {
);
}

let evalId: string | null = null
if (cmdObj.write) {
evalId = await writeResultsToDatabase(summary, config);
}

const { outputPath } = config;
if (outputPath) {
// Write output to file
if (typeof outputPath === 'string') {
await writeOutput(outputPath, summary, config, shareableUrl);
await writeOutput(outputPath, evalId, summary, config, shareableUrl);
} else if (Array.isArray(outputPath)) {
await writeMultipleOutputs(outputPath, summary, config, shareableUrl);
await writeMultipleOutputs(outputPath, evalId, summary, config, shareableUrl);
}
logger.info(chalk.yellow(`Writing output to ${outputPath}`));
}
Expand All @@ -808,8 +813,6 @@ async function main() {
if (!cmdObj.write) {
logger.info(`${chalk.green('✔')} Evaluation complete`);
} else {
await writeResultsToDatabase(summary, config);

if (shareableUrl) {
logger.info(`${chalk.green('✔')} Evaluation complete: ${shareableUrl}`);
} else {
Expand Down
25 changes: 13 additions & 12 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -539,13 +539,13 @@ export interface TestCase<Vars = Record<string, string | string[] | object>> {

// Additional configuration settings for the prompt
options?: PromptConfig &
OutputConfig &
GradingConfig & {
// If true, do not expand arrays of variables into multiple eval cases.
disableVarExpansion?: boolean;
// If true, do not include an implicit `_conversation` variable in the prompt.
disableConversationVar?: boolean;
};
OutputConfig &
GradingConfig & {
// If true, do not expand arrays of variables into multiple eval cases.
disableVarExpansion?: boolean;
// If true, do not include an implicit `_conversation` variable in the prompt.
disableConversationVar?: boolean;
};

// The required score for this test case. If not provided, the test case is graded pass/fail.
threshold?: number;
Expand Down Expand Up @@ -643,11 +643,11 @@ export interface TestSuiteConfig {

// Determines whether or not sharing is enabled.
sharing?:
| boolean
| {
apiBaseUrl?: string;
appBaseUrl?: string;
};
| boolean
| {
apiBaseUrl?: string;
appBaseUrl?: string;
};

// Nunjucks filters
nunjucksFilters?: Record<string, FilePath>;
Expand Down Expand Up @@ -693,6 +693,7 @@ export interface ResultsFile {

// File exported as --output option
export interface OutputFile {
evalId: string | null;
results: EvaluateSummary;
config: Partial<UnifiedConfig>;
shareableUrl: string | null;
Expand Down
6 changes: 4 additions & 2 deletions src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -357,17 +357,19 @@ export async function readConfigs(configPaths: string[]): Promise<UnifiedConfig>

export async function writeMultipleOutputs(
outputPaths: string[],
evalId: string | null,
results: EvaluateSummary,
config: Partial<UnifiedConfig>,
shareableUrl: string | null,
) {
await Promise.all(
outputPaths.map((outputPath) => writeOutput(outputPath, results, config, shareableUrl)),
outputPaths.map((outputPath) => writeOutput(outputPath, evalId, results, config, shareableUrl)),
);
}

export async function writeOutput(
outputPath: string,
evalId: string | null,
results: EvaluateSummary,
config: Partial<UnifiedConfig>,
shareableUrl: string | null,
Expand Down Expand Up @@ -424,7 +426,7 @@ ${gradingResultText}`.trim();
} else if (outputExtension === 'json') {
fs.writeFileSync(
outputPath,
JSON.stringify({ results, config, shareableUrl } satisfies OutputFile, null, 2),
JSON.stringify({ evalId, results, config, shareableUrl } satisfies OutputFile, null, 2),
);
} else if (
outputExtension === 'yaml' ||
Expand Down
8 changes: 4 additions & 4 deletions test/util.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeOutput(outputPath, summary, config, shareableUrl);
writeOutput(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(1);
});
Expand Down Expand Up @@ -194,7 +194,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeOutput(outputPath, summary, config, shareableUrl);
writeOutput(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(1);
});
Expand Down Expand Up @@ -266,7 +266,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeOutput(outputPath, summary, config, shareableUrl);
writeOutput(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(1);
});
Expand Down Expand Up @@ -338,7 +338,7 @@ describe('util', () => {
description: 'test',
};
const shareableUrl = null;
writeMultipleOutputs(outputPath, summary, config, shareableUrl);
writeMultipleOutputs(outputPath, null, summary, config, shareableUrl);

expect(fs.writeFileSync).toHaveBeenCalledTimes(2);
});
Expand Down

0 comments on commit 758f841

Please sign in to comment.