Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions packages/sdk/server-ai/src/ldai/judge/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async def evaluate(
:param input_text: The input prompt or question that was provided to the AI
:param output_text: The AI-generated response to be evaluated
:param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
:return: Evaluation result; ``sampled=True`` when skipped due to sampling rate
:return: The result of the judge evaluation.
"""
judge_result = JudgeResult(judge_config_key=self._ai_config.key)

Expand All @@ -70,9 +70,9 @@ async def evaluate(

if random.random() > sampling_rate:
log.debug(f'Judge evaluation skipped due to sampling rate: {sampling_rate}')
judge_result.sampled = True
return judge_result

judge_result.sampled = True
messages = self._construct_evaluation_messages(input_text, output_text)
assert self._evaluation_response_structure is not None

Expand Down Expand Up @@ -110,7 +110,7 @@ async def evaluate_messages(
:param messages: Array of messages representing the conversation history
:param response: The AI response to be evaluated
:param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
:return: Evaluation result; ``sampled=True`` when skipped due to sampling rate
:return: The result of the judge evaluation.
"""
input_text = '\r\n'.join([msg.content for msg in messages]) if messages else ''
output_text = response.message.content
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/server-ai/src/ldai/providers/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ class JudgeResult:
judge_config_key: Optional[str] = None
success: bool = False
error_message: Optional[str] = None
sampled: bool = False # True when the judge was skipped due to sampling rate
sampled: bool = False # True when the evaluation was sampled and run
metric_key: Optional[str] = None
score: Optional[float] = None
reasoning: Optional[str] = None
metric_key: Optional[str] = None

def to_dict(self) -> Dict[str, Any]:
"""
Expand Down
3 changes: 3 additions & 0 deletions packages/sdk/server-ai/src/ldai/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ def track_judge_result(self, judge_result: Any, *, graph_key: Optional[str] = No
:param judge_result: JudgeResult object containing score, metric key, and success status
:param graph_key: When set, include ``graphKey`` in the event payload.
"""
if not judge_result.sampled:
return

if judge_result.success and judge_result.metric_key:
track_data = self.__get_track_data(graph_key=graph_key)
if judge_result.judge_config_key:
Expand Down
6 changes: 4 additions & 2 deletions packages/sdk/server-ai/tests/test_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ async def test_evaluate_success_with_valid_response(

assert isinstance(result, JudgeResult)
assert result.success is True
assert result.sampled is True
assert result.metric_key == '$ld:ai:judge:relevance'
assert result.score == 0.85
assert result.reasoning is not None
Expand All @@ -194,6 +195,7 @@ async def test_evaluate_success_with_evaluation_response_shape(

assert isinstance(result, JudgeResult)
assert result.success is True
assert result.sampled is True
assert result.metric_key == '$ld:ai:judge:relevance'
assert result.score == 0.9
assert result.reasoning is not None
Expand Down Expand Up @@ -288,13 +290,13 @@ async def test_evaluate_handles_exception(
async def test_evaluate_respects_sampling_rate(
self, judge_config_with_key: AIJudgeConfig, tracker: LDAIConfigTracker, mock_runner
):
"""Evaluate should return sampled=True when skipped due to sampling rate."""
"""Evaluate should return sampled=False when skipped due to sampling rate."""
judge = Judge(judge_config_with_key, tracker, mock_runner)

result = await judge.evaluate("input", "output", sampling_rate=0.0)

assert isinstance(result, JudgeResult)
assert result.sampled is True
assert result.sampled is False
assert result.success is False
mock_runner.invoke_structured_model.assert_not_called()

Expand Down
Loading