Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion frontend/src/components/History/AttackTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,22 @@ import {
CheckmarkCircleRegular,
DismissCircleRegular,
QuestionCircleRegular,
ErrorCircleRegular,
} from '@fluentui/react-icons'
import type { AttackSummary } from '../../types'
import { useAttackHistoryStyles } from './AttackHistory.styles'

const OUTCOME_ICONS: Record<string, React.ReactElement> = {
success: <CheckmarkCircleRegular style={{ color: tokens.colorPaletteGreenForeground1 }} />,
failure: <DismissCircleRegular style={{ color: tokens.colorPaletteRedForeground1 }} />,
error: <ErrorCircleRegular style={{ color: tokens.colorPaletteRedForeground1 }} />,
undetermined: <QuestionCircleRegular style={{ color: tokens.colorNeutralForeground3 }} />,
}

const OUTCOME_COLORS: Record<string, 'success' | 'danger' | 'informative'> = {
const OUTCOME_COLORS: Record<string, 'success' | 'danger' | 'informative' | 'warning'> = {
success: 'success',
failure: 'danger',
error: 'warning',
undetermined: 'informative',
}

Expand Down
2 changes: 2 additions & 0 deletions frontend/src/components/History/HistoryFiltersBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const NO_CONVERTERS_SENTINEL = '__no_converters__'
const OUTCOME_LABELS: Record<string, string> = {
success: 'Success',
failure: 'Failure',
error: 'Error',
undetermined: 'Undetermined',
}

Expand Down Expand Up @@ -200,6 +201,7 @@ export default function HistoryFiltersBar({
<Option value="">All outcomes</Option>
<Option value="success">Success</Option>
<Option value="failure">Failure</Option>
<Option value="error">Error</Option>
<Option value="undetermined">Undetermined</Option>
</Combobox>
<Combobox
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ export interface AttackSummary {
attack_specific_params?: Record<string, unknown> | null
target?: TargetInfo | null
converters: string[]
outcome?: 'undetermined' | 'success' | 'failure' | null
outcome?: 'undetermined' | 'success' | 'failure' | 'error' | null
last_message_preview?: string | null
message_count: number
related_conversation_ids: string[]
Expand Down
9 changes: 8 additions & 1 deletion pyrit/analytics/result_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ class AttackStats:
successes: int
failures: int
undetermined: int
errors: int


def _compute_stats(successes: int, failures: int, undetermined: int) -> AttackStats:
def _compute_stats(successes: int, failures: int, undetermined: int, errors: int) -> AttackStats:
total_decided = successes + failures
success_rate = successes / total_decided if total_decided > 0 else None
return AttackStats(
Expand All @@ -28,6 +29,7 @@ def _compute_stats(successes: int, failures: int, undetermined: int) -> AttackSt
successes=successes,
failures=failures,
undetermined=undetermined,
errors=errors,
)


Expand Down Expand Up @@ -71,6 +73,9 @@ def analyze_results(attack_results: list[AttackResult]) -> dict[str, AttackStats
elif outcome == AttackOutcome.FAILURE:
overall_counts["failures"] += 1
by_type_counts[attack_type]["failures"] += 1
elif outcome == AttackOutcome.ERROR:
overall_counts["errors"] += 1
by_type_counts[attack_type]["errors"] += 1
else:
overall_counts["undetermined"] += 1
by_type_counts[attack_type]["undetermined"] += 1
Expand All @@ -79,13 +84,15 @@ def analyze_results(attack_results: list[AttackResult]) -> dict[str, AttackStats
successes=overall_counts["successes"],
failures=overall_counts["failures"],
undetermined=overall_counts["undetermined"],
errors=overall_counts["errors"],
)

by_type_stats = {
attack_type: _compute_stats(
successes=counts["successes"],
failures=counts["failures"],
undetermined=counts["undetermined"],
errors=counts["errors"],
)
for attack_type, counts in by_type_counts.items()
}
Expand Down
38 changes: 38 additions & 0 deletions pyrit/backend/mappers/attack_mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
Message,
MessagePiece,
MessagePieceRequest,
RetryEventResponse,
Score,
TargetInfo,
)
Expand All @@ -43,6 +44,7 @@

if TYPE_CHECKING:
from pyrit.models.conversation_stats import ConversationStats
from pyrit.models.retry_event import RetryEvent

# ============================================================================
# Domain → DTO (for API responses)
Expand Down Expand Up @@ -181,6 +183,34 @@ def _resolve_media_url(*, value: Optional[str], data_type: str) -> Optional[str]
return value


def retry_events_to_response(retry_events: list[RetryEvent] | None) -> list[RetryEventResponse] | None:
"""
Convert a list of RetryEvent domain objects to RetryEventResponse DTOs.

Args:
retry_events: Domain retry events, or None.

Returns:
List of RetryEventResponse DTOs, or None if the input is None or empty.
"""
if not retry_events:
return None
return [
RetryEventResponse(
timestamp=evt.timestamp,
attempt_number=evt.attempt_number,
function_name=evt.function_name,
exception_type=evt.exception_type,
exception_message=evt.exception_message,
component_role=evt.component_role,
component_name=evt.component_name,
endpoint=evt.endpoint,
elapsed_seconds=evt.elapsed_seconds,
)
for evt in retry_events
]


def attack_result_to_summary(
ar: AttackResult,
*,
Expand Down Expand Up @@ -232,6 +262,9 @@ def attack_result_to_summary(
else None
)

# Build retry event responses if available
retry_event_responses = retry_events_to_response(ar.retry_events)

return AttackSummary(
attack_result_id=ar.attack_result_id,
conversation_id=ar.conversation_id,
Expand All @@ -246,6 +279,11 @@ def attack_result_to_summary(
labels=labels,
created_at=created_at,
updated_at=updated_at,
error_message=ar.error_message,
error_type=ar.error_type,
error_traceback=ar.error_traceback,
total_retries=ar.total_retries,
retry_events=retry_event_responses,
)


Expand Down
29 changes: 27 additions & 2 deletions pyrit/backend/models/attacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,20 @@ class TargetInfo(BaseModel):
model_name: Optional[str] = Field(None, description="Model or deployment name")


class RetryEventResponse(BaseModel):
"""A single retry attempt captured during execution."""

timestamp: datetime = Field(..., description="When the retry occurred")
attempt_number: int = Field(..., ge=1, description="Tenacity attempt number (1-based)")
function_name: str = Field(..., description="The retried function name")
exception_type: str = Field("", description="Exception class name")
exception_message: str = Field("", description="Exception message")
component_role: str = Field("", description="Component role from ExecutionContext")
component_name: str | None = Field(None, description="Component class name")
endpoint: str | None = Field(None, description="Target endpoint URL")
elapsed_seconds: float = Field(0.0, ge=0, description="Time since first attempt in seconds")


class AttackSummary(BaseModel):
"""Summary view of an attack (for list views, omits full message content)."""

Expand All @@ -102,7 +116,7 @@ class AttackSummary(BaseModel):
default_factory=list, description="Request converter class names applied in this attack"
)
objective: str = Field("", description="Natural-language description of the attacker's objective")
outcome: Optional[Literal["undetermined", "success", "failure"]] = Field(
outcome: Optional[Literal["undetermined", "success", "failure", "error"]] = Field(
None, description="Attack outcome (null if not yet determined)"
)
outcome_reason: str | None = Field(None, description="Reason for the outcome")
Expand All @@ -121,6 +135,17 @@ class AttackSummary(BaseModel):
created_at: datetime = Field(..., description="Attack creation timestamp")
updated_at: datetime = Field(..., description="Last update timestamp")

# Error information
error_message: str | None = Field(None, description="Error message if the attack failed with an exception")
error_type: str | None = Field(None, description="Exception class name (e.g., 'RateLimitError')")
error_traceback: str | None = Field(None, description="Formatted traceback string")

# Retry information
total_retries: int = Field(0, ge=0, description="Total number of retries during this attack")
retry_events: list[RetryEventResponse] | None = Field(
None, description="Detailed retry events (omitted in list views unless requested)"
)


# ============================================================================
# Conversation Messages Response
Expand Down Expand Up @@ -236,7 +261,7 @@ class CreateAttackResponse(BaseModel):
class UpdateAttackRequest(BaseModel):
"""Request to update an attack's outcome."""

outcome: Literal["undetermined", "success", "failure"] = Field(..., description="Updated attack outcome")
outcome: Literal["undetermined", "success", "failure", "error"] = Field(..., description="Updated attack outcome")


# ============================================================================
Expand Down
3 changes: 3 additions & 0 deletions pyrit/backend/models/scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class ScenarioRunSummary(BaseModel):
created_at: datetime = Field(..., description="When the run was created")
updated_at: datetime = Field(..., description="When the run status last changed")
error: str | None = Field(None, description="Error message if status is FAILED")
error_type: str | None = Field(None, description="Exception class name if status is FAILED")
strategies_used: list[str] = Field(default_factory=list, description="Strategy names that were executed")
total_attacks: int = Field(0, ge=0, description="Total number of atomic attacks")
completed_attacks: int = Field(0, ge=0, description="Number of attacks that completed")
Expand Down Expand Up @@ -126,6 +127,8 @@ class AtomicAttackResults(BaseModel):
success_count: int = Field(0, ge=0, description="Number of successful attacks")
failure_count: int = Field(0, ge=0, description="Number of failed attacks")
total_count: int = Field(0, ge=0, description="Total number of attack results")
total_retries: int = Field(0, ge=0, description="Sum of retries across all attacks in this group")
error_count: int = Field(0, ge=0, description="Number of attacks with errors")


class ScenarioRunDetail(BaseModel):
Expand Down
4 changes: 3 additions & 1 deletion pyrit/backend/routes/attacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ async def list_attacks(
description="Filter by converter presence. true = attacks with at least one converter; "
"false = attacks with no converters. Omit for no filter.",
),
outcome: Optional[Literal["undetermined", "success", "failure"]] = Query(None, description="Filter by outcome"),
outcome: Optional[Literal["undetermined", "success", "failure", "error"]] = Query(
None, description="Filter by outcome"
),
label: Optional[list[str]] = Query(
None,
description="Filter by labels (format: key:value). May be specified multiple times; "
Expand Down
3 changes: 2 additions & 1 deletion pyrit/backend/services/attack_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ async def list_attacks_async(
converter_types: Optional[Sequence[str]] = None,
converter_types_match: Literal["any", "all"] = "all",
has_converters: Optional[bool] = None,
outcome: Optional[Literal["undetermined", "success", "failure"]] = None,
outcome: Optional[Literal["undetermined", "success", "failure", "error"]] = None,
labels: Optional[dict[str, str | Sequence[str]]] = None,
min_turns: Optional[int] = None,
max_turns: Optional[int] = None,
Expand Down Expand Up @@ -370,6 +370,7 @@ async def update_attack_async(
"undetermined": AttackOutcome.UNDETERMINED,
"success": AttackOutcome.SUCCESS,
"failure": AttackOutcome.FAILURE,
"error": AttackOutcome.ERROR,
}
new_outcome = outcome_map.get(request.outcome, AttackOutcome.UNDETERMINED)

Expand Down
54 changes: 48 additions & 6 deletions pyrit/backend/services/scenario_run_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any

from pyrit.backend.mappers.attack_mappers import retry_events_to_response
from pyrit.backend.models.scenarios import (
AtomicAttackResults,
AttackSummary,
Expand Down Expand Up @@ -183,7 +184,12 @@ async def cancel_run_async(self, *, scenario_result_id: str) -> ScenarioRunSumma
await asyncio.wait_for(active.task, timeout=5.0)

# Persist cancelled state to DB
self._memory.update_scenario_run_state(scenario_result_id=scenario_result_id, scenario_run_state="CANCELLED")
self._memory.update_scenario_run_state(
scenario_result_id=scenario_result_id,
scenario_run_state="CANCELLED",
error_message="Run was cancelled by user",
error_type="CancelledError",
)

return self._build_response(scenario_result_id=scenario_result_id)

Expand Down Expand Up @@ -395,12 +401,24 @@ def _build_response_from_db(self, *, scenario_result: ScenarioResult) -> Scenari
scenario_result_id = str(scenario_result.id)
active = self._active_tasks.get(scenario_result_id)

# Clean up finished active tasks after reading the error
error = None
if active is not None:
# Clean up finished active tasks
if active is not None and active.task is not None and active.task.done():
del self._active_tasks[scenario_result_id]

# Primary source: DB-persisted error fields
error = scenario_result.error_message
error_type = scenario_result.error_type

# Fallback: look up error from persisted error AttackResults
if not error and scenario_result.error_attack_result_ids:
error_ars = self._memory.get_attack_results(attack_result_ids=scenario_result.error_attack_result_ids)
if error_ars:
error = error_ars[0].error_message
error_type = error_ars[0].error_type

# Fallback: in-memory error for in-flight tasks where DB hasn't been updated yet
if not error and active is not None:
error = active.error
if active.task is not None and active.task.done():
del self._active_tasks[scenario_result_id]

status = ScenarioRunStatus(scenario_result.scenario_run_state)

Expand All @@ -426,6 +444,7 @@ def _build_response_from_db(self, *, scenario_result: ScenarioResult) -> Scenari
created_at=scenario_result.creation_time,
updated_at=scenario_result.completion_time,
error=error,
error_type=error_type,
strategies_used=strategies_used,
total_attacks=total_attacks,
completed_attacks=completed_attacks,
Expand Down Expand Up @@ -467,6 +486,8 @@ def get_run_results(self, *, scenario_result_id: str) -> ScenarioRunDetail | Non
details: list[AttackSummary] = []
success_count = 0
failure_count = 0
group_total_retries = 0
group_error_count = 0

for ar in attack_results:
score_value = None
Expand All @@ -478,6 +499,16 @@ def get_run_results(self, *, scenario_result_id: str) -> ScenarioRunDetail | Non
last_response_text = str(ar.last_response)

timestamp = ar.timestamp or datetime.now(timezone.utc)

# Build retry event responses using the shared mapper
retry_event_responses = retry_events_to_response(ar.retry_events)

# Extract error/retry fields
ar_error_message = ar.error_message
ar_error_type = ar.error_type
ar_error_traceback = ar.error_traceback
ar_total_retries = ar.total_retries

details.append(
AttackSummary(
attack_result_id=ar.attack_result_id,
Expand All @@ -491,6 +522,11 @@ def get_run_results(self, *, scenario_result_id: str) -> ScenarioRunDetail | Non
execution_time_ms=ar.execution_time_ms,
created_at=timestamp,
updated_at=timestamp,
error_message=ar_error_message,
error_type=ar_error_type,
error_traceback=ar_error_traceback,
total_retries=ar_total_retries,
retry_events=retry_event_responses,
)
)

Expand All @@ -499,6 +535,10 @@ def get_run_results(self, *, scenario_result_id: str) -> ScenarioRunDetail | Non
elif ar.outcome == AttackOutcome.FAILURE:
failure_count += 1

group_total_retries += ar_total_retries
if ar_error_message:
group_error_count += 1

attacks.append(
AtomicAttackResults(
atomic_attack_name=attack_name,
Expand All @@ -507,6 +547,8 @@ def get_run_results(self, *, scenario_result_id: str) -> ScenarioRunDetail | Non
success_count=success_count,
failure_count=failure_count,
total_count=len(details),
total_retries=group_total_retries,
error_count=group_error_count,
)
)

Expand Down
Loading
Loading