Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
3f92e73
Migrate ScenarioRegistry onto unified Registry base
rlundeen2 Jul 1, 2026
7121d66
style: normalize Phase 5 scenario-registry files to ruff-format
rlundeen2 Jul 1, 2026
6349a53
Add ScenarioRegistry.create_and_initialize_async; backend uses it
rlundeen2 Jul 1, 2026
8e108b2
Unify scenario param-setting into one registry-owned helper
rlundeen2 Jul 1, 2026
3a2966d
Rename declared-param resolver for naming parity with resolve_constru…
rlundeen2 Jul 1, 2026
379b50c
Move scenario param persistence/diff mechanics into the registry reso…
rlundeen2 Jul 1, 2026
baf5bdf
Merge origin/main (#2112) into phase-5-scenario-registry
rlundeen2 Jul 1, 2026
5501669
Remove max_dataset_size from scenario catalog metadata
rlundeen2 Jul 1, 2026
656f8ff
docs: fix stale ScenarioRegistry reference in normalize_scenario_name…
rlundeen2 Jul 1, 2026
14ba07e
refactor: remove unused discover_user_scenarios from ScenarioRegistry
rlundeen2 Jul 1, 2026
47f3a0d
Slim ScenarioIdentifier to a pure typed projection
rlundeen2 Jul 1, 2026
9851b3f
Flatten ScenarioResult identity, make ScenarioIdentifier registry-only
rlundeen2 Jul 1, 2026
716a59c
Unify registry discovery via recursive subclass enumeration
rlundeen2 Jul 1, 2026
24adc24
Fold ScenarioResult identity into ScenarioIdentifier
rlundeen2 Jul 2, 2026
9fc4a11
Fold pyrit_version into ScenarioIdentifier; rename _reject_undeclared…
rlundeen2 Jul 2, 2026
8617a20
Simplify scenario resume validation to eval-hash comparison
rlundeen2 Jul 2, 2026
c4e3f2d
Reserve 'version' as a scenario param name
rlundeen2 Jul 2, 2026
6d78894
Add migration to rename scenario_init_data column; apply ruff format
rlundeen2 Jul 2, 2026
9729f2d
Enforce every ComponentType is classified for reference resolution
rlundeen2 Jul 2, 2026
6435a5c
Merge origin/main into rlundeen2-phase-5-scenario-registry
rlundeen2 Jul 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions pyrit/backend/services/scenario_run_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,24 +469,30 @@ def _resolve_converter_modifiers(self, *, modifiers: list[str], token: str) -> l

async def _initialize_scenario_async(self, *, request: RunScenarioRequest, init_kwargs: dict[str, Any]) -> Scenario:
"""
Instantiate the scenario and call initialize_async.
Build and initialize the scenario via the registry.

Delegates the full create + set-parameters + initialize lifecycle to
``ScenarioRegistry.create_and_initialize_async`` so the registry owns
scenario creation and initialization. The run-specific common parameters
(target, strategies, dataset config, concurrency) are resolved by
``_build_init_kwargs`` and forwarded as ``init_kwargs``.

Args:
request: The run request (for scenario_name, scenario_params, and
scenario_result_id).
init_kwargs: The kwargs to pass to scenario.initialize_async.
init_kwargs: The resolved common parameters to pass to
scenario.initialize_async.

Returns:
The fully initialized Scenario instance ready for run_async.
"""
constructor_kwargs: dict[str, Any] = {}
if request.scenario_result_id:
constructor_kwargs["scenario_result_id"] = request.scenario_result_id
scenario_registry = ScenarioRegistry.get_registry_singleton()
scenario = scenario_registry.create_instance(request.scenario_name, **constructor_kwargs)
scenario.set_params_from_args(args=request.scenario_params or {})
await scenario.initialize_async(**init_kwargs)
return scenario
return await scenario_registry.create_and_initialize_async(
request.scenario_name,
scenario_params=request.scenario_params or {},
scenario_result_id=request.scenario_result_id or None,
**init_kwargs,
)

async def _execute_run_async(self, *, scenario_result_id: str) -> None:
"""
Expand Down Expand Up @@ -579,8 +585,8 @@ def _build_response_from_db(self, *, scenario_result: ScenarioResult) -> Scenari

return ScenarioRunSummary(
scenario_result_id=scenario_result_id,
scenario_name=scenario_result.scenario_identifier.name,
scenario_version=scenario_result.scenario_identifier.version,
scenario_name=scenario_result.scenario_name,
scenario_version=scenario_result.scenario_version,
status=status,
created_at=scenario_result.creation_time,
updated_at=scenario_result.completion_time or scenario_result.creation_time,
Expand Down
17 changes: 10 additions & 7 deletions pyrit/backend/services/scenario_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def _metadata_to_registered_scenario(metadata: ScenarioMetadata) -> RegisteredSc
aggregate_strategies=list(metadata.aggregate_strategies),
all_strategies=list(metadata.all_strategies),
default_datasets=list(metadata.default_datasets),
max_dataset_size=metadata.max_dataset_size,
supported_parameters=list(metadata.supported_parameters),
)

Expand Down Expand Up @@ -68,15 +67,20 @@ async def list_scenarios_async(
Returns:
ScenarioListResponse with paginated scenario summaries.
"""
all_metadata = self._registry.list_metadata()
all_metadata = self._registry.get_all_registered_class_metadata()
all_summaries = [_metadata_to_registered_scenario(m) for m in all_metadata]

page, has_more = self._paginate(items=all_summaries, cursor=cursor, limit=limit)
next_cursor = page[-1].scenario_name if has_more and page else None

return ListRegisteredScenariosResponse(
items=page,
pagination=PaginationInfo(limit=limit, has_more=has_more, next_cursor=next_cursor, prev_cursor=cursor),
pagination=PaginationInfo(
limit=limit,
has_more=has_more,
next_cursor=next_cursor,
prev_cursor=cursor,
),
)

async def get_scenario_async(self, *, scenario_name: str) -> RegisteredScenario | None:
Expand All @@ -89,10 +93,9 @@ async def get_scenario_async(self, *, scenario_name: str) -> RegisteredScenario
Returns:
ScenarioSummary if found, None otherwise.
"""
all_metadata = self._registry.list_metadata()
for metadata in all_metadata:
if metadata.registry_name == scenario_name:
return _metadata_to_registered_scenario(metadata)
metadata = self._registry.get_registered_class_metadata(scenario_name)
if metadata is not None:
return _metadata_to_registered_scenario(metadata)
return None

@staticmethod
Expand Down
3 changes: 1 addition & 2 deletions pyrit/cli/_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ def print_scenario_list(*, items: list[RegisteredScenario]) -> None:
if sc.default_strategy:
print(f" Default Strategy: {sc.default_strategy}")
if sc.default_datasets:
suffix = f", max {sc.max_dataset_size} per dataset" if sc.max_dataset_size else ""
print(f" Default Datasets ({len(sc.default_datasets)}{suffix}):")
print(f" Default Datasets ({len(sc.default_datasets)}):")
print(_wrap(text=", ".join(sc.default_datasets), indent=" "))
if sc.supported_parameters:
print(" Supported Parameters:")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Rename ScenarioResultEntries.scenario_init_data to scenario_identifier.

The scenario result now stores a single canonical ``ScenarioIdentifier`` in
place of the loose ``scenario_init_data`` blob. Rename the column and make it
non-nullable to match the model.

Revision ID: d4e6f8a0b2c4
Revises: c3d5e7f9a1b2
Create Date: 2026-07-02 10:35:00.000000
"""

from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision: str = "d4e6f8a0b2c4"
down_revision: str | None = "c3d5e7f9a1b2"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Apply this schema upgrade."""
# SQLite does not support ALTER COLUMN in place; batch_alter_table recreates
# the table so the rename and NOT NULL change are portable across SQLite and
# Azure SQL.
with op.batch_alter_table("ScenarioResultEntries") as batch_op:
batch_op.alter_column(
"scenario_init_data",
new_column_name="scenario_identifier",
existing_type=sa.JSON(),
existing_nullable=True,
nullable=False,
)


def downgrade() -> None:
"""Revert this schema upgrade."""
with op.batch_alter_table("ScenarioResultEntries") as batch_op:
batch_op.alter_column(
"scenario_identifier",
new_column_name="scenario_init_data",
existing_type=sa.JSON(),
existing_nullable=False,
nullable=True,
)
70 changes: 37 additions & 33 deletions pyrit/memory/memory_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
EvaluationIdentifier,
MessagePiece,
PromptDataType,
ScenarioEvaluationIdentifier,
ScenarioIdentifier,
ScenarioResult,
ScenarioRunState,
Expand Down Expand Up @@ -1102,7 +1103,8 @@ class ScenarioResultEntry(Base):
scenario_description (str): Optional detailed description of the scenario.
scenario_version (int): Version number of the scenario definition (default: 1).
pyrit_version (str): Version of PyRIT framework used during scenario execution.
scenario_init_data (dict): Optional initialization parameters used to configure the scenario.
scenario_identifier (dict): Canonical scenario identity (class name, version,
techniques, datasets, resolved params, objective target / scorer children).
objective_target_identifier (dict): Identifier for the target being evaluated in the scenario.
objective_scorer_identifier (dict): Optional identifier for the scorer used to evaluate results.
scenario_run_state (str): Current execution state of the scenario
Expand Down Expand Up @@ -1130,7 +1132,9 @@ class ScenarioResultEntry(Base):
scenario_description = mapped_column(Unicode, nullable=True)
scenario_version = mapped_column(INTEGER, nullable=False, default=1)
pyrit_version = mapped_column(String, nullable=False)
scenario_init_data: Mapped[dict[str, Any] | None] = mapped_column(JSON, nullable=True)
#: Canonical scenario identity (class name, version, techniques, datasets,
#: resolved params, objective target / scorer children) with its eval hash.
scenario_identifier: Mapped[dict[str, Any]] = mapped_column(JSON, nullable=False)
objective_target_identifier: Mapped[dict[str, Any]] = mapped_column(JSON, nullable=False)
objective_scorer_identifier: Mapped[dict[str, Any] | None] = mapped_column(JSON, nullable=True)
scenario_run_state: Mapped[str] = mapped_column(String, nullable=False, default="CREATED")
Expand Down Expand Up @@ -1161,24 +1165,32 @@ def __init__(self, *, entry: ScenarioResult) -> None:
entry (ScenarioResult): The scenario result object to convert into a database entry.
"""
self.id = entry.id
self.scenario_name = entry.scenario_identifier.name
self.scenario_description = entry.scenario_identifier.description
self.scenario_version = entry.scenario_identifier.version
self.pyrit_version = entry.scenario_identifier.pyrit_version
self.scenario_init_data = entry.scenario_identifier.init_data
self.scenario_name = entry.scenario_name
self.scenario_description = entry.scenario_description
self.scenario_version = entry.scenario_version
self.pyrit_version = entry.pyrit_version

# Stamp the canonical scenario identifier's eval_hash fresh and store it.
# The denormalized target / scorer columns are populated from the same
# identifier for DB-level filtering (never a value trusted from storage).
scenario_identifier = entry.scenario_identifier.with_eval_hash(
ScenarioEvaluationIdentifier(entry.scenario_identifier).eval_hash
)
self.scenario_identifier = scenario_identifier.model_dump()

# Convert ComponentIdentifier to dict for JSON storage
target_identifier = entry.objective_target_identifier
self.objective_target_identifier = ( # type: ignore[ty:invalid-assignment]
entry.objective_target_identifier.model_dump() if entry.objective_target_identifier else None
target_identifier.model_dump() if target_identifier else None
)
# Always recompute eval_hash before dumping so the stored JSON carries the
# freshly computed value for DB-level filtering (never a value from storage).
if entry.objective_scorer_identifier:
entry.objective_scorer_identifier = entry.objective_scorer_identifier.with_eval_hash(
ScorerEvaluationIdentifier(entry.objective_scorer_identifier).eval_hash
scorer_identifier = entry.objective_scorer_identifier
if scorer_identifier:
scorer_identifier = scorer_identifier.with_eval_hash(
ScorerEvaluationIdentifier(scorer_identifier).eval_hash
)
self.objective_scorer_identifier = (
entry.objective_scorer_identifier.model_dump() if entry.objective_scorer_identifier else None
)
self.objective_scorer_identifier = scorer_identifier.model_dump() if scorer_identifier else None
self.scenario_run_state = entry.scenario_run_state.value
self.labels = entry.labels
self.number_tries = entry.number_tries
Expand Down Expand Up @@ -1211,29 +1223,22 @@ def get_scenario_result(self) -> ScenarioResult:
Returns:
ScenarioResult object with scenario metadata but empty attack_results
"""
# Recreate ScenarioIdentifier with the stored pyrit_version
# The canonical scenario identity (name / version / techniques / datasets /
# params / target / scorer children) is stored as one JSON column and
# reconstructed here as a typed ScenarioIdentifier. eval_hash is recomputed
# on reload (never trusted from storage). The denormalized target / scorer
# columns exist only for DB-level filtering, so they aren't read back here.
stored_version = self.pyrit_version or LEGACY_PYRIT_VERSION
scenario_identifier = ScenarioIdentifier(
name=self.scenario_name,
description=self.scenario_description or "",
scenario_version=self.scenario_version,
init_data=self.scenario_init_data,
pyrit_version=stored_version,
)

# Return empty attack_results - will be populated by memory_interface
attack_results: dict[str, list[AttackResult]] = {}

# Convert dict back to ComponentIdentifier with the stored pyrit_version;
# eval_hash is recomputed on reload via ScorerEvaluationIdentifier.
scorer_identifier = _load_identifier(
self.objective_scorer_identifier,
pyrit_version=stored_version,
eval_identifier_cls=ScorerEvaluationIdentifier,
base_identifier = ComponentIdentifier.model_validate(
{**self.scenario_identifier, "pyrit_version": stored_version}
)
scenario_identifier = ScenarioIdentifier.from_component_identifier(
base_identifier.with_eval_hash(ScenarioEvaluationIdentifier(base_identifier).eval_hash)
)

# Convert dict back to ComponentIdentifier for reconstruction
target_identifier = _load_identifier(self.objective_target_identifier)

# Deserialize display_group_map if stored
display_group_map: dict[str, str] | None = None
Expand All @@ -1243,9 +1248,8 @@ def get_scenario_result(self) -> ScenarioResult:
return ScenarioResult(
id=self.id,
scenario_identifier=scenario_identifier,
objective_target_identifier=target_identifier,
scenario_description=self.scenario_description or "",
attack_results=attack_results,
objective_scorer_identifier=scorer_identifier,
scenario_run_state=ScenarioRunState(self.scenario_run_state),
labels=self.labels or {},
creation_time=self.timestamp,
Expand Down
5 changes: 4 additions & 1 deletion pyrit/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
IdentifierFilter,
IdentifierType,
ObjectiveTargetEvaluationIdentifier,
ScenarioEvaluationIdentifier,
ScenarioIdentifier,
ScorerEvaluationIdentifier,
ScorerIdentifier,
SeedIdentifier,
Expand Down Expand Up @@ -95,7 +97,7 @@
)
from pyrit.models.question_answering import QuestionAnsweringDataset, QuestionAnsweringEntry, QuestionChoice
from pyrit.models.results.attack_result import AttackOutcome, AttackResult, AttackResultT
from pyrit.models.results.scenario_result import ScenarioIdentifier, ScenarioResult, ScenarioRunState
from pyrit.models.results.scenario_result import ScenarioResult, ScenarioRunState
from pyrit.models.results.strategy_result import StrategyResult, StrategyResultT
from pyrit.models.retry_event import RetryEvent
from pyrit.models.score import Score, ScoreType, UnvalidatedScore
Expand Down Expand Up @@ -194,6 +196,7 @@
"ScaleDescription",
"Score",
"ScoreType",
"ScenarioEvaluationIdentifier",
"ScorerEvaluationIdentifier",
"ScorerIdentifier",
"ScenarioIdentifier",
Expand Down
1 change: 0 additions & 1 deletion pyrit/models/catalog/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class RegisteredScenario(BaseModel):
)
all_strategies: list[str] = Field(..., description="All available concrete strategy names")
default_datasets: list[str] = Field(..., description="Default dataset names used by the scenario")
max_dataset_size: int | None = Field(None, description="Maximum items per dataset (None means unlimited)")
supported_parameters: list[Parameter] = Field(
default_factory=list, description="Scenario-declared custom parameters"
)
Expand Down
4 changes: 4 additions & 0 deletions pyrit/models/identifiers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ChildEvalRule,
EvaluationIdentifier,
ObjectiveTargetEvaluationIdentifier,
ScenarioEvaluationIdentifier,
ScorerEvaluationIdentifier,
compute_eval_hash,
compute_inner_attack_eval_hash,
Expand All @@ -31,6 +32,7 @@
from pyrit.models.identifiers.evaluation_markers import EvalMarker, Evaluate, Exclude, Include, Unwrap
from pyrit.models.identifiers.identifier_filters import IdentifierFilter, IdentifierType
from pyrit.models.identifiers.param_markers import Param, ParamMarker
from pyrit.models.identifiers.scenario_identifier import ScenarioIdentifier
from pyrit.models.identifiers.scorer_identifier import ScorerIdentifier
from pyrit.models.identifiers.seed_identifier import SeedIdentifier
from pyrit.models.identifiers.target_identifier import TargetIdentifier
Expand All @@ -57,8 +59,10 @@
"REGISTRY_NAME_PATTERN",
"Param",
"ParamMarker",
"ScenarioEvaluationIdentifier",
"ScorerEvaluationIdentifier",
"ScorerIdentifier",
"ScenarioIdentifier",
"SeedIdentifier",
"snake_case_to_class_name",
"TARGET_EVAL_PARAM_FALLBACKS",
Expand Down
18 changes: 18 additions & 0 deletions pyrit/models/identifiers/evaluation_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
* ``AtomicAttackEvaluationIdentifier`` — attack-domain concrete subclass.
* ``ObjectiveTargetEvaluationIdentifier`` — leaf-target subclass used by the
analytics layer to key cached results by behavioral target configuration.
* ``ScenarioEvaluationIdentifier`` — scenario-domain concrete subclass used to
key a scenario run's behavioral identity (for resume drift detection).
"""

from __future__ import annotations
Expand All @@ -30,6 +32,7 @@
from pyrit.models.identifiers.attack_identifier import AttackIdentifier
from pyrit.models.identifiers.component_identifier import ComponentIdentifier, config_hash
from pyrit.models.identifiers.evaluation_markers import EvalMarker, Exclude, Include, Unwrap
from pyrit.models.identifiers.scenario_identifier import ScenarioIdentifier
from pyrit.models.identifiers.scorer_identifier import ScorerIdentifier
from pyrit.models.identifiers.target_identifier import TargetIdentifier

Expand Down Expand Up @@ -531,6 +534,21 @@ class ObjectiveTargetEvaluationIdentifier(EvaluationIdentifier):
EVAL_ROOT: ClassVar[type[ComponentIdentifier] | None] = TargetIdentifier


class ScenarioEvaluationIdentifier(EvaluationIdentifier):
"""
Evaluation identity for scenarios.

Rules are derived from ``ScenarioIdentifier``'s field markers: the definition
``version`` and resolved ``techniques`` / ``datasets`` feed the hash, the
resolved scenario ``params`` are included, and the ``objective_target`` /
``objective_scorer`` children contribute their full behavioral projection.
Two runs of the same scenario definition with the same configuration produce
the same eval hash, which backs resume drift detection.
"""

EVAL_ROOT: ClassVar[type[ComponentIdentifier] | None] = ScenarioIdentifier


def compute_inner_attack_eval_hash(*, attack: AttackStrategy[Any, Any]) -> str:
"""
Predict the eval hash the executor will stamp on persisted child rows
Expand Down
Loading
Loading