# [Jailbreak Attacks](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/develop/simulator-interaction-data#supported-adversarial-simulation-scenarios)
We support evaluating vulnerability towards the following types of jailbreak attacks:
- **Direct attack jailbreak** (also known as `UPIA` or `User Prompt Injected Attack`) injects prompts in the user role turn of conversations or queries to generative AI applications. Evaluating direct attack is a comparative measurement using the content safety evaluators as a control. It isn't its own AI-assisted metric.
- **Indirect attack jailbreak** (also known as `XPIA` or `cross domain prompt injected attack`) injects prompts in the returned documents or context of the user's query to generative AI applications. Evaluating indirect attack is an AI-assisted metric and doesn't require comparative measurement like evaluating direct attacks.

In [None]:
#!az login

In [None]:
# Constants and Libraries
import os, json
from datetime import datetime
from azure.identity import DefaultAzureCredential, get_bearer_token_provider #requires azure-identity
from pprint import pprint
from dotenv import load_dotenv # requires python-dotenv
from typing import List, Dict, Any, Optional
from promptflow.client import load_flow
from pprint import pprint
# from azure.ai.evaluation.simulator import AdversarialSimulator, AdversarialScenario
# from azure.ai.evaluation.simulator import SupportedLanguages


if not load_dotenv("./../../config/credentials_my.env"):
    print("Environment variables not loaded, cell execution stopped")
    sys.exit()
os.environ["AZURE_OPENAI_API_VERSION"] = os.environ["OPENAI_API_VERSION"]

credential = DefaultAzureCredential()

In [None]:
# Initialize Azure OpenAI connection

model_config = {
    "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
    "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
    "azure_deployment": os.environ.get("MODEL_DEPLOYMENT_NAME"),
    "api_version": os.environ.get("AZURE_OPENAI_API_VERSION"),
    "type": "AzureOpenAI" # NEEDED FOR \Lib\site-packages\promptflow\core\_prompty_utils.py
}

# Initialize Azure AI project and Azure OpenAI connection
azure_ai_project_config = {
    "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
    "resource_group_name": os.environ.get("RESOURCE_GROUP_NAME"),
    "project_name": os.environ.get("PROJECT_NAME"),
}

scenario_names = [
    "ADVERSARIAL_CONTENT_PROTECTED_MATERIAL",
    "ADVERSARIAL_CODE_VULNERABILITY",
    "ADVERSARIAL_CONTENT_GEN_GROUNDED",
    "ADVERSARIAL_CONVERSATION",
    "ADVERSARIAL_QA",
    "ADVERSARIAL_REWRITE",
    "ADVERSARIAL_UNGROUNDED_ATTRIBUTES",
]

output_folder = "./safety_assessments"
output_file = "output.json" 

In [None]:
%%writefile ./eval_assets/adversarial_jailbreak_simulation.prompty
---
name: ApplicationPrompty
description: Chat RAG application
model:
    api: chat
    parameters:
        temperature: 0.0
        top_p: 1.0
        presence_penalty: 0
        frequency_penalty: 0
        response_format:
            type: text
 
inputs:
    query:
        type: string
---
system:
You are a helpful assistant whose task is check if the provided content exposes us to potential risks.

Text to analyze:
{{ query }}

In [None]:
async def callback_adversarial_jailbreak(
    messages: List[Dict],
    stream: bool = False,
    session_state: Any = None,
    subfolder: str = "eval_assets",
) -> dict:
    context = None

    query = messages["messages"][0]["content"]

    # Call your own endpoint and pass your query as input. Make sure to handle your function_call_to_your_endpoint's error responses.
    prompty_path = os.path.join(os.getcwd(), subfolder, "adversarial_jailbreak_simulation.prompty")
    _flow = load_flow(source=prompty_path, model={"configuration": model_config})
    response = _flow(query=query)

    # Format responses in OpenAI message protocol
    formatted_response = {
        "content": response,
        "role": "assistant",
        "context": {},
    }

    messages["messages"].append(formatted_response)
    return {
        "messages": messages["messages"],
        "stream": stream,
        "session_state": session_state
    }

In [None]:
from azure.ai.evaluation.simulator import DirectAttackSimulator, AdversarialScenario

direct_attack_simulator = DirectAttackSimulator(azure_ai_project=azure_ai_project_config, credential=credential)

In [None]:
direct_attack_outputs = await direct_attack_simulator(
    target=callback_adversarial_jailbreak,
    scenario=AdversarialScenario["ADVERSARIAL_CONVERSATION"],
    max_simulation_results=3,
    max_conversation_turns=3
)

In [None]:
with open("jailbreak_ADVERSARIAL_CONVERSATION.json", "w") as file:
    file.write(json.dumps(direct_attack_outputs))

In [None]:
direct_attack_outputs