Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ repos:
files: ^(doc/.*\.(py|ipynb|md)|doc/myst\.yml)$
pass_filenames: false
additional_dependencies: ['pyyaml']
- id: enforce_alembic_revision_immutability
name: Enforce Alembic Revision Immutability
entry: python ./build_scripts/enforce_alembic_revision_immutability.py
language: python
files: ^pyrit/memory/alembic/versions/.*\.py$
pass_filenames: false
- id: memory-migrations-check
name: Check Memory Migrations
entry: python ./build_scripts/memory_migrations.py check
language: system
pass_filenames: false
Comment thread
behnam-o marked this conversation as resolved.
files: ^pyrit/memory/(memory_models\.py|alembic/.*|migration\.py)$

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
Expand Down
8 changes: 8 additions & 0 deletions .pyrit_conf_example
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ operation: op_trash_panda
# - /path/to/.env
# - /path/to/.env.local

# Schema Migration Check
# ---------------------
# If true, runs database schema migration on startup to ensure the database
# is up to date with the latest PyRIT version.
# Set to false to skip the check (e.g., for read-only access, testing, or
# when managing migrations externally).
check_schema: true

# Silent Mode
# -----------
# If true, suppresses print statements during initialization.
Expand Down
39 changes: 39 additions & 0 deletions build_scripts/enforce_alembic_revision_immutability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Migration history must be immutable. This hook enforces that by preventing deletion or updates to migration scripts.

Checks both staged changes (local pre-commit) and the full branch diff against origin/main (CI).
"""

import subprocess
import sys

_VERSIONS_PATH = "pyrit/memory/alembic/versions/"


def _git(*args: str) -> str:
result = subprocess.run(["git", *args], capture_output=True, text=True)
return result.stdout.strip()


def _has_non_add_changes(diff_spec: list[str]) -> bool:
output = _git("diff", "--name-status", *diff_spec, "--", _VERSIONS_PATH)
return any(line and not line.startswith("A") for line in output.splitlines())


def has_revision_violations() -> bool:
# Local pre-commit: check staged changes
if _has_non_add_changes(["--cached"]):
return True

# CI: check full branch diff against origin/main
merge_base = _git("merge-base", "origin/main", "HEAD")
return bool(merge_base and _has_non_add_changes([f"{merge_base}...HEAD"]))


if __name__ == "__main__":
if has_revision_violations():
print("[ERROR] Migration scripts can only be added, not modified or deleted.")
sys.exit(1)
90 changes: 90 additions & 0 deletions build_scripts/memory_migrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import argparse
import sys
import tempfile
from pathlib import Path

from alembic.util.exc import AutogenerateDiffsDetected
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine

from pyrit.memory.migration import check_schema_migrations, generate_schema_migration, run_schema_migrations

# ANSI color codes
_RED = "\033[91m"
_RESET = "\033[0m"


def _print_error(message: str) -> None:
"""Print an error message in red to stderr."""
print(f"{_RED}{message}{_RESET}", file=sys.stderr)


def _create_temp_engine() -> tuple[Engine, Path]:
"""Create a temp SQLite database upgraded to head and return engine and path."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
tmp_path = Path(tmp.name)
engine = create_engine(f"sqlite:///{tmp_path}")
run_schema_migrations(engine=engine)
return engine, tmp_path


def _cmd_generate(*, message: str, force: bool = False) -> None:
"""Generate a new Alembic revision from model changes."""
engine, tmp_path = _create_temp_engine()
try:
generate_schema_migration(engine=engine, message=message, force=force)
print("Migration file generated. Review it carefully before committing.")
except RuntimeError as e:
_print_error(str(e))
raise SystemExit(1) from e
finally:
engine.dispose()
tmp_path.unlink(missing_ok=True)


def _cmd_check() -> None:
"""Verify all migrations apply cleanly and schema matches models."""
engine, tmp_path = _create_temp_engine()
try:
check_schema_migrations(engine=engine)
except AutogenerateDiffsDetected as e:
_print_error(f"Migration check failed. Run 'generate' to create a migration. Error: {e}")
raise SystemExit(1) from e
finally:
engine.dispose()
tmp_path.unlink(missing_ok=True)


def _build_parser() -> argparse.ArgumentParser:
"""Build the CLI argument parser."""
parser = argparse.ArgumentParser(
description="PyRIT memory migration tool. Generate and validate migrations based on the current memory models."
)
sub = parser.add_subparsers(dest="command", required=True)

gen = sub.add_parser("generate", help="Generate a new migration from model changes.")
gen.add_argument("-m", "--message", required=True, help="Migration message.")
gen.add_argument("--force", action="store_true", help="Generate migration even if no changes detected.")

sub.add_parser("check", help="Verify all migrations apply cleanly and add up to the current memory models.")

return parser


def main() -> int:
"""Dispatch the selected migration command."""
args = _build_parser().parse_args()

if args.command == "generate":
_cmd_generate(message=args.message, force=args.force)
elif args.command == "check":
_cmd_check()

return 0


if __name__ == "__main__":
raise SystemExit(main())
97 changes: 97 additions & 0 deletions doc/contributing/11_memory_models.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Memory Models & Migrations

This guide covers how to work with PyRIT's memory models — where they live, how to add or update them, and how the migration system works.

## Where Things Live

| What | Path |
|---|---|
| ORM models (SQLAlchemy) | `pyrit/memory/memory_models.py` |
| Domain objects they map to | `pyrit/models/` (e.g. `MessagePiece`, `Score`, `Seed`, `AttackResult`, `ScenarioResult`) |
| Alembic migration environment | `pyrit/memory/alembic/env.py` |
| Migration revisions | `pyrit/memory/alembic/versions/` |
| Migration helpers | `pyrit/memory/migration.py` |
| CLI migration tool | `build_scripts/memory_migrations.py` |
| Schema diagram | `doc/code/memory/10_schema_diagram.md` |

## Current Models

All models inherit from the SQLAlchemy `Base` declarative class and live in `memory_models.py`:

- **`PromptMemoryEntry`** — prompt/response data (`PromptMemoryEntries` table)
- **`ScoreEntry`** — evaluation results (`ScoreEntries` table)
- **`EmbeddingDataEntry`** — embeddings for semantic search (`EmbeddingData` table)
- **`SeedEntry`** — dataset prompts/templates (`SeedPromptEntries` table)
- **`AttackResultEntry`** — attack execution results (`AttackResultEntries` table)
- **`ScenarioResultEntry`** — scenario execution metadata (`ScenarioResultEntries` table)

Each entry model has a corresponding domain object and conversion methods (e.g. `PromptMemoryEntry.__init__(entry: MessagePiece)` and `get_message_piece()`).

## Adding or Updating a Model

### 1. Edit the model

Make your changes in `pyrit/memory/memory_models.py`. Follow these conventions:

- Use `mapped_column()` with explicit types.
- Use `CustomUUID` for all UUID columns (handles cross-database compatibility).
- Add foreign keys where relationships exist.
- Include `pyrit_version` on new entry models.

### 2. Generate a migration

```bash
python build_scripts/memory_migrations.py generate -m "short description of change"
```

This creates a new revision file under `pyrit/memory/alembic/versions/`. **Review the generated file carefully** — auto-generated migrations may need manual adjustments (e.g. for data migrations or default values).

### 3. Validate the migration

```bash
python build_scripts/memory_migrations.py check
```

This verifies the schema produced by running all migrations matches the current models. Both pre-commit hooks (see below) and CI run this check.

### 4. Update the schema diagram

If you changed the schema in a meaningful way (added a table, added a foreign key, etc.), update the Mermaid diagram in `doc/code/memory/10_schema_diagram.md`.

## How Migrations Run at Startup

When `initialize_pyrit_async()` is called with `check_schema=True` (the default), migrations run automatically:

```
initialize_pyrit_async()
→ memory._ensure_schema_is_current() # pyrit/memory/memory_interface.py
→ run_schema_migrations(engine=...) # pyrit/memory/migration.py
→ alembic upgrade head
```

This means any new migration you add will be applied automatically the next time a user initializes PyRIT. The behavior depends on the database state:

| Database state | What happens |
|---|---|
| **Fresh (no tables)** | All migrations apply from scratch |
| **Already versioned** | Only unapplied migrations run (idempotent) |
| **Legacy (tables exist, no version tracking)** | Validates schema matches models, stamps current version, then upgrades. Raises `RuntimeError` on mismatch to prevent data corruption |

Migrations run inside a transaction (`engine.begin()`), so a failed migration rolls back cleanly. The version tracking table is `pyrit_memory_alembic_version`.

Users can skip this check by passing `check_schema=False` to `initialize_pyrit_async()`.

## Important Rules

### Migration revisions are immutable

Once a migration revision is committed, it **must not be modified or deleted**. This is enforced by a pre-commit hook (`enforce_alembic_revision_immutability`). If you need to fix a migration, create a new revision instead.

### Pre-commit hooks

Two hooks run automatically when you touch memory-related files:

1. **`enforce_alembic_revision_immutability`** — blocks modifications/deletions to existing revision files.
2. **`memory-migrations-check`** — runs `memory_migrations.py check` to verify the schema is in sync.

These hooks trigger on changes to `pyrit/memory/memory_models.py`, `pyrit/memory/migration.py`, and files under `pyrit/memory/alembic/`.
1 change: 1 addition & 0 deletions doc/myst.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ project:
- file: contributing/8_pre_commit.md
- file: contributing/9_exception.md
- file: contributing/10_release_process.md
- file: contributing/11_memory_models.md
- file: gui/0_gui.md
- file: scanner/0_scanner.md
children:
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ classifiers = [
requires-python = ">=3.10, <3.14"
dependencies = [
"aiofiles>=24,<25",
"alembic>=1.16.0",
"appdirs>=1.4.0",
"art>=6.5.0",
"av>=14.0.0",
Expand Down Expand Up @@ -199,6 +200,8 @@ include = ["pyrit", "pyrit.*"]
[tool.setuptools.package-data]
pyrit = [
"backend/frontend/**/*",
"memory/alembic/**/*",
"memory/alembic.ini",
"py.typed"
]

Expand Down
24 changes: 24 additions & 0 deletions pyrit/memory/alembic/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from alembic import context
from sqlalchemy.engine import Connection

from pyrit.memory.memory_models import Base
from pyrit.memory.migration import PYRIT_MEMORY_ALEMBIC_VERSION_TABLE

config = context.config
connection: Connection | None = config.attributes.get("connection")
target_metadata = Base.metadata

if connection is None:
raise RuntimeError("No connection found for Alembic migration")

context.configure(
connection=connection,
target_metadata=target_metadata,
compare_type=True,
version_table=PYRIT_MEMORY_ALEMBIC_VERSION_TABLE,
)
with context.begin_transaction():
context.run_migrations()
32 changes: 32 additions & 0 deletions pyrit/memory/alembic/script.py.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
${message}.

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""

from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision: str = "${up_revision}"
down_revision: str | None = ${repr(down_revision).replace("'", '"')}
branch_labels: str | Sequence[str] | None = ${repr(branch_labels).replace("'", '"')}
depends_on: str | Sequence[str] | None = ${repr(depends_on).replace("'", '"')}


def upgrade() -> None:
"""Apply this schema upgrade."""
${upgrades if upgrades else "pass"}


def downgrade() -> None:
"""Revert this schema upgrade."""
${downgrades if downgrades else "pass"}
Loading
Loading