Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions packages/sdk/server-ai/__tests__/Judge.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ describe('Judge', () => {

// Create a basic judge config
judgeConfig = {
key: 'test-judge',
enabled: true,
messages: [
{ role: 'system', content: 'You are a helpful judge that evaluates AI responses.' },
Expand Down Expand Up @@ -106,11 +107,21 @@ describe('Judge', () => {

expect(result).toEqual({
evals: {
relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
relevance: {
score: 0.8,
reasoning: 'The response is relevant to the question',
},
accuracy: {
score: 0.9,
reasoning: 'The response is factually accurate',
},
helpfulness: {
score: 0.7,
reasoning: 'The response provides helpful information',
},
},
success: true,
judgeConfigKey: 'test-judge',
});

expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
Expand Down Expand Up @@ -258,6 +269,7 @@ describe('Judge', () => {
helpfulness: { score: 0.7, reasoning: 'Helpful' },
},
success: false,
judgeConfigKey: 'test-judge',
});
});

Expand Down Expand Up @@ -289,6 +301,7 @@ describe('Judge', () => {
expect(result).toEqual({
evals: {},
success: false,
judgeConfigKey: 'test-judge',
});
});

Expand All @@ -302,6 +315,7 @@ describe('Judge', () => {
evals: {},
success: false,
error: 'Provider error',
judgeConfigKey: 'test-judge',
});
expect(mockLogger.error).toHaveBeenCalledWith('Judge evaluation failed:', error);
});
Expand All @@ -315,6 +329,7 @@ describe('Judge', () => {
evals: {},
success: false,
error: 'Unknown error',
judgeConfigKey: 'test-judge',
});
});
});
Expand Down Expand Up @@ -364,11 +379,21 @@ describe('Judge', () => {

expect(result).toEqual({
evals: {
relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
relevance: {
score: 0.8,
reasoning: 'The response is relevant to the question',
},
accuracy: {
score: 0.9,
reasoning: 'The response is factually accurate',
},
helpfulness: {
score: 0.7,
reasoning: 'The response provides helpful information',
},
},
success: true,
judgeConfigKey: 'test-judge',
});

expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
Expand Down
5 changes: 5 additions & 0 deletions packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ describe('agents method', () => {

expect(result).toEqual({
'research-agent': {
key: 'research-agent',
model: {
name: 'research-model',
parameters: { temperature: 0.3, maxTokens: 2048 },
Expand All @@ -382,6 +383,7 @@ describe('agents method', () => {
enabled: true,
},
'writing-agent': {
key: 'writing-agent',
model: {
name: 'writing-model',
parameters: { temperature: 0.7, maxTokens: 1024 },
Expand Down Expand Up @@ -482,6 +484,7 @@ describe('createJudge method', () => {
};

const mockJudgeConfig = {
key: 'test-judge',
enabled: true,
model: { name: 'gpt-4' },
provider: { name: 'openai' },
Expand Down Expand Up @@ -525,6 +528,7 @@ describe('createJudge method', () => {
};

const mockJudgeConfig = {
key: 'test-judge',
enabled: false,
evaluationMetricKeys: [],
};
Expand All @@ -548,6 +552,7 @@ describe('createJudge method', () => {
};

const mockJudgeConfig = {
key: 'test-judge',
enabled: true,
model: { name: 'gpt-4' },
provider: { name: 'openai' },
Expand Down
1 change: 1 addition & 0 deletions packages/sdk/server-ai/__tests__/TrackedChat.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ describe('TrackedChat', () => {

// Create a basic AI config
aiConfig = {
key: 'test-config',
enabled: true,
messages: [{ role: 'system', content: 'You are a helpful assistant.' }],
model: { name: 'gpt-4' },
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/server-ai/src/LDAIClientImpl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export class LDAIClientImpl implements LDAIClient {
this._logger?.warn(
`AI Config mode mismatch for ${key}: expected ${mode}, got ${flagMode}. Returning disabled config.`,
);
return LDAIConfigUtils.createDisabledConfig(mode);
return LDAIConfigUtils.createDisabledConfig(key, mode);
}

const tracker = new LDAIConfigTrackerImpl(
Expand All @@ -78,7 +78,7 @@ export class LDAIClientImpl implements LDAIClient {
context,
);

const config = LDAIConfigUtils.fromFlagValue(value, tracker);
const config = LDAIConfigUtils.fromFlagValue(key, value, tracker);

// Apply variable interpolation (always needed for ldctx)
return this._applyInterpolation(config, context, variables);
Expand Down
14 changes: 12 additions & 2 deletions packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common';
import { name as aiSdkName, version as aiSdkVersion } from '../package.json';
import { LDAIConfigTracker } from './api/config';
import { LDAIMetricSummary } from './api/config/LDAIConfigTracker';
import { EvalScore } from './api/judge/types';
import { EvalScore, JudgeResponse } from './api/judge/types';
import {
createBedrockTokenUsage,
createOpenAiUsage,
Expand Down Expand Up @@ -76,12 +76,22 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
}

trackEvalScores(scores: Record<string, EvalScore>) {
// Track each evaluation score individually
Object.entries(scores).forEach(([metricKey, evalScore]) => {
this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score);
});
}

trackJudgeResponse(response: JudgeResponse) {
Object.entries(response.evals).forEach(([metricKey, evalScore]) => {
this._ldClient.track(
metricKey,
this._context,
{ ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey },
evalScore.score,
);
});
}

trackFeedback(feedback: { kind: LDFeedbackKind }): void {
this._trackedMetrics.feedback = feedback;
if (feedback.kind === LDFeedbackKind.Positive) {
Expand Down
14 changes: 9 additions & 5 deletions packages/sdk/server-ai/src/api/chat/TrackedChat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,17 @@ export class TrackedChat {
return undefined;
}

const evalResult = await judge.evaluateMessages(messages, response, judgeConfig.samplingRate);

if (evalResult && evalResult.success) {
this.tracker.trackEvalScores(evalResult.evals);
const judgeResponse = await judge.evaluateMessages(
messages,
response,
judgeConfig.samplingRate,
);

if (judgeResponse && judgeResponse.success) {
this.tracker.trackJudgeResponse(judgeResponse);
}

return evalResult;
return judgeResponse;
});

// ensure all evaluations complete even if some fail
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { EvalScore } from '../judge/types';
import { EvalScore, JudgeResponse } from '../judge/types';
import { LDAIMetrics, LDFeedbackKind, LDTokenUsage } from '../metrics';

/**
Expand Down Expand Up @@ -94,6 +94,13 @@ export interface LDAIConfigTracker {
*/
trackEvalScores(scores: Record<string, EvalScore>): void;

/**
* Track a judge response containing evaluation scores and judge configuration key.
*
* @param response Judge response containing evaluation scores and judge configuration key
*/
trackJudgeResponse(response: JudgeResponse): void;

/**
* Track the duration of execution of the provided function.
*
Expand Down
29 changes: 20 additions & 9 deletions packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,23 @@ export class LDAIConfigUtils {
* @param tracker The tracker to add to the config
* @returns The appropriate AI configuration type
*/
static fromFlagValue(flagValue: LDAIConfigFlagValue, tracker: LDAIConfigTracker): LDAIConfigKind {
static fromFlagValue(
key: string,
flagValue: LDAIConfigFlagValue,
tracker: LDAIConfigTracker,
): LDAIConfigKind {
// Determine the actual mode from flag value
// eslint-disable-next-line no-underscore-dangle
const flagValueMode = flagValue._ldMeta?.mode;

switch (flagValueMode) {
case 'agent':
return this.toAgentConfig(flagValue, tracker);
return this.toAgentConfig(key, flagValue, tracker);
case 'judge':
return this.toJudgeConfig(flagValue, tracker);
return this.toJudgeConfig(key, flagValue, tracker);
case 'completion':
default:
return this.toCompletionConfig(flagValue, tracker);
return this.toCompletionConfig(key, flagValue, tracker);
}
}

Expand All @@ -104,15 +108,17 @@ export class LDAIConfigUtils {
* @param mode The mode for the disabled config
* @returns A disabled config of the appropriate type
*/
static createDisabledConfig(mode: LDAIConfigMode): LDAIConfigKind {
static createDisabledConfig(key: string, mode: LDAIConfigMode): LDAIConfigKind {
switch (mode) {
case 'agent':
return {
key,
enabled: false,
tracker: undefined,
} as LDAIAgentConfig;
case 'judge':
return {
key,
enabled: false,
tracker: undefined,
evaluationMetricKeys: [],
Expand All @@ -121,6 +127,7 @@ export class LDAIConfigUtils {
default:
// Default to completion config for completion mode or any unexpected mode
return {
key,
enabled: false,
tracker: undefined,
} as LDAICompletionConfig;
Expand All @@ -133,8 +140,9 @@ export class LDAIConfigUtils {
* @param flagValue The flag value from LaunchDarkly
* @returns Base configuration object
*/
private static _toBaseConfig(flagValue: LDAIConfigFlagValue) {
private static _toBaseConfig(key: string, flagValue: LDAIConfigFlagValue) {
return {
key,
// eslint-disable-next-line no-underscore-dangle
enabled: flagValue._ldMeta?.enabled ?? false,
model: flagValue.model,
Expand All @@ -150,11 +158,12 @@ export class LDAIConfigUtils {
* @returns A completion configuration
*/
static toCompletionConfig(
key: string,
flagValue: LDAIConfigFlagValue,
tracker: LDAIConfigTracker,
): LDAICompletionConfig {
return {
...this._toBaseConfig(flagValue),
...this._toBaseConfig(key, flagValue),
tracker,
messages: flagValue.messages,
judgeConfiguration: flagValue.judgeConfiguration,
Expand All @@ -169,11 +178,12 @@ export class LDAIConfigUtils {
* @returns An agent configuration
*/
static toAgentConfig(
key: string,
flagValue: LDAIConfigFlagValue,
tracker: LDAIConfigTracker,
): LDAIAgentConfig {
return {
...this._toBaseConfig(flagValue),
...this._toBaseConfig(key, flagValue),
tracker,
instructions: flagValue.instructions,
judgeConfiguration: flagValue.judgeConfiguration,
Expand All @@ -188,11 +198,12 @@ export class LDAIConfigUtils {
* @returns A judge configuration
*/
static toJudgeConfig(
key: string,
flagValue: LDAIConfigFlagValue,
tracker: LDAIConfigTracker,
): LDAIJudgeConfig {
return {
...this._toBaseConfig(flagValue),
...this._toBaseConfig(key, flagValue),
tracker,
messages: flagValue.messages,
evaluationMetricKeys: flagValue.evaluationMetricKeys || [],
Expand Down
4 changes: 4 additions & 0 deletions packages/sdk/server-ai/src/api/config/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ export interface LDAIConfigDefault {
* Base AI Config interface without mode-specific fields.
*/
export interface LDAIConfig extends Omit<LDAIConfigDefault, 'enabled'> {
/**
* The key of the AI Config.
*/
key: string;
/**
* Whether the configuration is enabled.
*/
Expand Down
2 changes: 2 additions & 0 deletions packages/sdk/server-ai/src/api/judge/Judge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,15 @@ export class Judge {
return {
evals,
success,
judgeConfigKey: this._aiConfig.key,
};
} catch (error) {
this._logger?.error('Judge evaluation failed:', error);
return {
evals: {},
success: false,
error: error instanceof Error ? error.message : 'Unknown error',
judgeConfigKey: this._aiConfig.key,
};
}
}
Expand Down
2 changes: 2 additions & 0 deletions packages/sdk/server-ai/src/api/judge/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ export interface EvalScore {
* Response from a judge evaluation containing scores and reasoning for multiple metrics.
*/
export interface JudgeResponse {
/** The key of the judge configuration that was used to generate this response */
judgeConfigKey?: string;
/** Dictionary where keys are metric names and values contain score and reasoning */
evals: Record<string, EvalScore>;
/** Whether the evaluation completed successfully */
Expand Down