Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/code/executor/attack/0_attack.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ flowchart LR
S_c["CrescendoAttack"]
S_r["RedTeamingAttack"]
s_t["TreeOfAttacksWithPruningAttack (aka TAPAttack)"]
s_pair["PAIRAttack"]
S_multi["MultiTurnAttackStrategy (ABC)"]
S_seq["SequentialAttack"]
S_compound["Compound Attacks"]
Expand All @@ -59,6 +60,8 @@ flowchart LR
S_single --> S_psa
S_multi --> S_c
S_multi --> S_r
S_multi --> s_t
s_t --> s_pair
S_compound --> S_seq

```
Expand Down
10 changes: 9 additions & 1 deletion doc/code/executor/attack/tap_attack.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@
"exploration. The tree visualization in the result provides insights into the attack's\n",
"decision-making process.\n",
"\n",
"PAIR (Prompt Automatic Iterative Refinement) [@chao2023pair] is the structural special\n",
"case of TAP with no tree expansion and no off-topic pruning -- i.e. parallel single-branch\n",
"iterative refinement. PyRIT exposes it as `PAIRAttack`, a thin `TAPAttack` subclass that\n",
"hardcodes `branching_factor=1` and `on_topic_checking_enabled=False`. Everything else\n",
"(target, scoring, converters, `tree_width`, `tree_depth`) is configured exactly the same\n",
"way as below.\n",
"\n",
"The results and intermediate interactions will be saved to memory according to the environment settings. For details, see the [Memory Configuration Guide](../../memory/0_memory.md)."
]
},
Expand Down Expand Up @@ -603,7 +610,8 @@
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all"
"cell_metadata_filter": "-all",
"main_language": "python"
},
"language_info": {
"codemirror_mode": {
Expand Down
7 changes: 7 additions & 0 deletions doc/code/executor/attack/tap_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@
# exploration. The tree visualization in the result provides insights into the attack's
# decision-making process.
#
# PAIR (Prompt Automatic Iterative Refinement) [@chao2023pair] is the structural special
# case of TAP with no tree expansion and no off-topic pruning -- i.e. parallel single-branch
Comment thread
romanlutz marked this conversation as resolved.
# iterative refinement. PyRIT exposes it as `PAIRAttack`, a thin `TAPAttack` subclass that
# hardcodes `branching_factor=1` and `on_topic_checking_enabled=False`. Everything else
# (target, scoring, converters, `tree_width`, `tree_depth`) is configured exactly the same
# way as below.
#
# The results and intermediate interactions will be saved to memory according to the environment settings. For details, see the [Memory Configuration Guide](../../memory/0_memory.md).
# %%
import os
Expand Down
58 changes: 30 additions & 28 deletions pyrit/executor/attack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
MultiPromptSendingAttackParameters,
MultiTurnAttackContext,
MultiTurnAttackStrategy,
PAIRAttack,
RedTeamingAttack,
RTASystemPromptPaths,
TAPAttack,
Expand Down Expand Up @@ -63,49 +64,50 @@
from pyrit.output.attack_result.pretty import PrettyAttackResultMemoryPrinter as ConsoleAttackResultPrinter

__all__ = [
"AttackStrategy",
"AttackAdversarialConfig",
"AttackContext",
"AttackConverterConfig",
"AttackExecutor",
"AttackExecutorResult",
"AttackParameters",
"AttackResultPrinter",
"AttackScoringConfig",
"AttackStrategy",
"ChunkedRequestAttack",
"ChunkedRequestAttackContext",
"ConsoleAttackResultPrinter",
"ContextComplianceAttack",
"ConversationManager",
"ConversationSession",
"ConversationState",
"CrescendoAttack",
"CrescendoAttackContext",
"CrescendoAttackResult",
"FlipAttack",
"ManyShotJailbreakAttack",
"MarkdownAttackResultPrinter",
"MultiPromptSendingAttack",
"MultiPromptSendingAttackParameters",
"TAPAttack",
"TreeOfAttacksWithPruningAttack",
"TAPAttackContext",
"TAPAttackResult",
"TAPSystemPromptPaths",
"SingleTurnAttackStrategy",
"SingleTurnAttackContext",
"MultiTurnAttackContext",
"MultiTurnAttackStrategy",
"PAIRAttack",
"PrependedConversationConfig",
"PromptSendingAttack",
"FlipAttack",
"ContextComplianceAttack",
"ManyShotJailbreakAttack",
"RTASystemPromptPaths",
"RedTeamingAttack",
"RolePlayAttack",
"RolePlayPaths",
"SkeletonKeyAttack",
"ConversationSession",
"MultiTurnAttackStrategy",
"MultiTurnAttackContext",
"RedTeamingAttack",
"RTASystemPromptPaths",
"ConsoleAttackResultPrinter",
"MarkdownAttackResultPrinter",
"AttackResultPrinter",
"AttackConverterConfig",
"AttackScoringConfig",
"AttackAdversarialConfig",
"ConversationManager",
"ConversationState",
"AttackExecutor",
"AttackExecutorResult",
"PrependedConversationConfig",
"SequenceCompletionPolicy",
"SequentialAttack",
"SequentialAttackResult",
"SequentialChildAttack",
"SingleTurnAttackContext",
"SingleTurnAttackStrategy",
"SkeletonKeyAttack",
"TAPAttack",
"TAPAttackContext",
"TAPAttackResult",
"TAPSystemPromptPaths",
"TreeOfAttacksWithPruningAttack",
"generate_simulated_conversation_async",
]
20 changes: 11 additions & 9 deletions pyrit/executor/attack/multi_turn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
MultiTurnAttackContext,
MultiTurnAttackStrategy,
)
from pyrit.executor.attack.multi_turn.pair import PAIRAttack
from pyrit.executor.attack.multi_turn.red_teaming import RedTeamingAttack, RTASystemPromptPaths
from pyrit.executor.attack.multi_turn.simulated_conversation import (
generate_simulated_conversation_async,
Expand All @@ -27,22 +28,23 @@
)

__all__ = [
"ConversationSession",
"MultiTurnAttackContext",
"MultiTurnAttackStrategy",
"ChunkedRequestAttack",
"ChunkedRequestAttackContext",
"MultiPromptSendingAttack",
"MultiPromptSendingAttackParameters",
"ConversationSession",
"CrescendoAttack",
"CrescendoAttackContext",
"CrescendoAttackResult",
"RedTeamingAttack",
"MultiPromptSendingAttack",
"MultiPromptSendingAttackParameters",
"MultiTurnAttackContext",
"MultiTurnAttackStrategy",
"PAIRAttack",
"RTASystemPromptPaths",
"generate_simulated_conversation_async",
"TreeOfAttacksWithPruningAttack",
"RedTeamingAttack",
"TAPAttack",
"TAPAttackResult",
"TAPAttackContext",
"TAPAttackResult",
"TAPSystemPromptPaths",
"TreeOfAttacksWithPruningAttack",
"generate_simulated_conversation_async",
]
108 changes: 108 additions & 0 deletions pyrit/executor/attack/multi_turn/pair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Prompt Automatic Iterative Refinement (PAIR) attack.

Implements PAIR (Chao et al. 2023, arXiv:2310.08419) as a thin subclass of
TreeOfAttacksWithPruningAttack with the PAIR-definitional structural
parameters (no tree branching, no off-topic pruning) hardcoded.
"""

import logging

from pyrit.common.apply_defaults import REQUIRED_VALUE, apply_defaults
from pyrit.executor.attack.component import PrependedConversationConfig
from pyrit.executor.attack.core.attack_config import (
AttackAdversarialConfig,
AttackConverterConfig,
)
from pyrit.executor.attack.multi_turn.tree_of_attacks import (
TAPAttackScoringConfig,
TreeOfAttacksWithPruningAttack,
)
from pyrit.prompt_normalizer import PromptNormalizer
from pyrit.prompt_target import PromptTarget

logger = logging.getLogger(__name__)


class PAIRAttack(TreeOfAttacksWithPruningAttack):
"""
Implement the Prompt Automatic Iterative Refinement (PAIR) attack strategy
[@chao2023pair].

PAIR is a structural special case of Tree of Attacks with Pruning
[@mehrotra2023tap]: parallel "streams" of single-branch iterative
refinement, with no tree expansion and no off-topic pruning. PyRIT
implements PAIR as a thin subclass of TreeOfAttacksWithPruningAttack that
hardcodes the two definitional structural parameters (branching_factor=1
and on_topic_checking_enabled=False) and reuses TAP's adversarial system
prompt. The remaining configuration (target, scoring, converters, width
and depth budget) is exposed exactly as on TAP, with TAP's defaults
preserved for tree_width (number of parallel streams) and tree_depth
(refinement iterations per stream).

References:
Jailbreaking Black Box Large Language Models in Twenty Queries
[@chao2023pair]

Tree of Attacks: Jailbreaking Black-Box LLMs Automatically
[@mehrotra2023tap]
"""

@apply_defaults
def __init__(
self,
*,
objective_target: PromptTarget = REQUIRED_VALUE, # type: ignore[ty:invalid-parameter-default]
attack_adversarial_config: AttackAdversarialConfig,
attack_converter_config: AttackConverterConfig | None = None,
attack_scoring_config: TAPAttackScoringConfig | None = None,
prompt_normalizer: PromptNormalizer | None = None,
tree_width: int = 3,
tree_depth: int = 5,
desired_response_prefix: str = "Sure, here is",
batch_size: int = 10,
prepended_conversation_config: PrependedConversationConfig | None = None,
) -> None:
"""
Initialize the PAIR attack strategy.

Args:
objective_target (PromptTarget): The target system to attack.
attack_adversarial_config (AttackAdversarialConfig): Configuration for the
adversarial chat component.
attack_converter_config (AttackConverterConfig | None): Configuration for
attack converters. Defaults to None.
attack_scoring_config (TAPAttackScoringConfig | None): Scoring configuration.
The objective scorer must be a FloatScaleThresholdScorer. If not
provided, a default FloatScaleThresholdScorer wrapping
SelfAskScaleScorer (threshold 0.7) is created. Defaults to None.
prompt_normalizer (PromptNormalizer | None): The prompt normalizer to use.
Defaults to None.
tree_width (int): Number of parallel "streams" (N in the PAIR paper).
Defaults to 3.
tree_depth (int): Maximum refinement iterations per stream (K in the PAIR
paper). Defaults to 5.
desired_response_prefix (str): Expected prefix for successful responses.
Defaults to "Sure, here is".
batch_size (int): Number of nodes to process in parallel per batch.
Defaults to 10.
prepended_conversation_config (PrependedConversationConfig | None):
Configuration for prepended-conversation handling. Defaults to None.
"""
super().__init__(
objective_target=objective_target,
attack_adversarial_config=attack_adversarial_config,
attack_converter_config=attack_converter_config,
attack_scoring_config=attack_scoring_config,
prompt_normalizer=prompt_normalizer,
tree_width=tree_width,
tree_depth=tree_depth,
branching_factor=1,
on_topic_checking_enabled=False,
desired_response_prefix=desired_response_prefix,
batch_size=batch_size,
prepended_conversation_config=prepended_conversation_config,
)
6 changes: 6 additions & 0 deletions pyrit/scenario/core/scenario_techniques.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pyrit.executor.attack import (
ContextComplianceAttack,
ManyShotJailbreakAttack,
PAIRAttack,
PromptSendingAttack,
RedTeamingAttack,
RolePlayAttack,
Expand Down Expand Up @@ -78,6 +79,11 @@
attack_class=TreeOfAttacksWithPruningAttack,
strategy_tags=["core", "multi_turn"],
),
AttackTechniqueSpec(
name="pair",
attack_class=PAIRAttack,
strategy_tags=["core", "multi_turn"],
),
AttackTechniqueSpec(
name="crescendo_simulated",
attack_class=PromptSendingAttack,
Expand Down
Loading
Loading