In [1]:
from typing import Optional, Dict, Any
import os
from azure.ai.evaluation.red_team import RedTeam, RiskCategory, AttackStrategy
from openai import AzureOpenAI
from azure.identity import AzureCliCredential, get_bearer_token_provider, DefaultAzureCredential
import ollama

In [2]:
# Initialize Azure credentials
credential = AzureCliCredential()

In [3]:
# ----- Configuration -----
# Azure AI Project information
azure_ai_project = os.environ.get("RED_TEAM_PROJECT_ENDPOINT")

# Azure OpenAI deployment information
azure_openai_deployment = os.environ.get("AZURE_OPENAI_ADVERSARIAL_DEPLOYMENT_NAME")
azure_openai_endpoint = os.environ.get("AZURE_OPENAI_ADVERSARIAL_ENDPOINT")
azure_openai_api_key = os.environ.get("AZURE_OPENAI_ADVERSARIAL_API_KEY")
azure_openai_api_version = os.environ.get("AZURE_OPENAI_ADVERSARIAL_API_VERSION")

# azure_openai_deployment = os.environ.get("AZURE_OPENAI_UNSAFE_DEPLOYMENT_NAME")
# azure_openai_endpoint = os.environ.get("AZURE_OPENAI_UNSAFE_ENDPOINT")
# azure_openai_api_key = os.environ.get("AZURE_OPENAI_UNSAFE_API_KEY")
# azure_openai_api_version = os.environ.get("AZURE_OPENAI_UNSAFE_API_VERSION")

In [4]:
# AzureOpenAI Callback
async def azure_openai_callback(
    messages: list,
    stream: Optional[bool] = False,
    session_state: Optional[str] = None,
    context: Optional[Dict[str, Any]] = None,
) -> dict[str, list[dict[str, str]]]:
    
    # Get token provider for Azure AD authentication
    token_provider = get_bearer_token_provider(credential, "https://cognitiveservices.azure.com/.default")

    # Initialize Azure OpenAI client
    client = AzureOpenAI(
        azure_endpoint=azure_openai_endpoint,
        api_version=azure_openai_api_version,
        azure_ad_token_provider=token_provider,
    )

    ## Extract the latest message from the conversation history
    messages_list = [{"role": message.role, "content": message.content} for message in messages]
    latest_message = messages_list[-1]["content"]

    try:
        # Call the model
        response = client.chat.completions.create(
            model=azure_openai_deployment,
            messages=[
                {"role": "user", "content": latest_message},
            ],
            # max_tokens=500, # If using an o1 base model, comment this line out
            max_completion_tokens=500,  # If using an o1 base model, uncomment this line
            # temperature=0.7, # If using an o1 base model, comment this line out (temperature param not supported for o1 base models)
        )

        # Format the response to follow the expected chat protocol format
        formatted_response = {"content": response.choices[0].message.content, "role": "assistant"}
    except Exception as e:
        print(f"Error calling Azure OpenAI: {e!s}")
        formatted_response = "I encountered an error and couldn't process your request."
    return {"messages": [formatted_response]}


In [5]:
# Llama Callback
MODEL_NAME = "llama3.2"

async def ollama_callback(
    messages: list,
    stream: Optional[bool] = False,
    session_state: Optional[str] = None,
    context: Optional[Dict[str, Any]] = None,
) -> dict[str, list[dict[str, str]]]:

    # Extract the latest message from the conversation history
    messages_list = [{"role": m.role, "content": m.content} for m in messages]
    latest_message = messages_list[-1]["content"]

    try:
        # Call the local LLaMA model via Ollama
        response = ollama.chat(
            model=MODEL_NAME,
            messages=[{"role": "user", "content": latest_message}],
        )

        # Format the response to follow the expected chat protocol format
        formatted_response = {
            "content": response["message"]["content"],
            "role": "assistant"
        }

    except Exception as e:
        print(f"Error calling Ollama: {e!s}")
        formatted_response = {
            "content": "I encountered an error and couldn't process your request.",
            "role": "assistant"
        }

    return {"messages": [formatted_response]}


In [6]:
# Create the RedTeam instance with all of the risk categories with 5 attack objectives generated for each category
red_team = RedTeam(
    azure_ai_project=azure_ai_project,
    credential=credential,
    risk_categories=[RiskCategory.Violence, RiskCategory.HateUnfairness, RiskCategory.Sexual, RiskCategory.SelfHarm],
    num_objectives=5,
)

Class RedTeam: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


In [7]:
result = await red_team.scan(
    target=ollama_callback,
    scan_name="Basic-Callback-Scan",
    attack_strategies=[
        AttackStrategy.MODERATE,  # Group of moderate complexity attacks
        AttackStrategy.ROT13,  # Use ROT13 encoding
    ],
    output_path="red_team_output.json",
)

🚀 STARTING RED TEAM SCAN
📂 Output directory: ./.scan_Basic-Callback-Scan_20251024_144456
📊 Risk categories: ['violence', 'hate_unfairness', 'sexual', 'self_harm']
🔗 Track your red team scan in AI Foundry: https://ai.azure.com/resource/build/redteaming/a23b937e-b56c-41d2-a51b-e1c19d816caf?wsid=/subscriptions/ffcdaad1-98c2-4d37-97be-371caf0be262/resourceGroups/FYP/providers/Microsoft.CognitiveServices/accounts/fyp-resource/projects/fyp&tid=15ce9348-be2a-462b-8fc0-e1765a9b204a
📋 Planning 12 total tasks
📝 Fetched baseline objectives for violence: 5 objectives
📝 Fetched baseline objectives for hate_unfairness: 5 objectives
📝 Fetched baseline objectives for sexual: 5 objectives
📝 Fetched baseline objectives for self_harm: 5 objectives
🔄 Fetching objectives for strategy 2/3: rot13
🔄 Fetching objectives for strategy 3/3: tense


Scanning:   0%|                                      | 0/12 [00:00<?, ?scan/s, current=initializing]

⚙️ Processing 12 tasks in parallel (max 5 at a time)
▶️ Starting task: baseline strategy for violence risk category
▶️ Starting task: baseline strategy for hate_unfairness risk category
▶️ Starting task: baseline strategy for sexual risk category
▶️ Starting task: baseline strategy for self_harm risk category
▶️ Starting task: rot13 strategy for violence risk category


Scanning:  33%|█████████▋                   | 4/12 [07:04<56:38, 424.86s/scan, current=initializing]

Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/baseline_violence_8a47c88e-85c0-488c-8b62-fec54c5fa8ed.json".
✅ Completed task 1/12 (8.3%) - baseline/violence in 424.9s
   Est. remaining: 79.9 minutes
Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/baseline_hate_unfairness_ced0bd91-b956-4de4-b165-acb8949cb01b.json".
✅ Completed task 2/12 (16.7%) - baseline/hate_unfairness in 424.9s
   Est. remaining: 36.3 minutes
Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/baseline_sexual_e4a1698f-ff4b-49cd-8e06-dbd6f6dfbfe1.json".
✅ Completed task 3/12 (25.0%) - baseline/sexual in 424.9s
   Est. remaining: 21.8 minutes
Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Re

Scanning:  42%|████████████▌                 | 5/12 [07:05<07:23, 63.34s/scan, current=initializing]

Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/rot13_violence_c228bad6-9244-4f5c-a5eb-bd693241c071.json".
✅ Completed task 5/12 (41.7%) - rot13/violence in 425.1s
   Est. remaining: 10.2 minutes
▶️ Starting task: rot13 strategy for hate_unfairness risk category
▶️ Starting task: rot13 strategy for sexual risk category
▶️ Starting task: rot13 strategy for self_harm risk category
▶️ Starting task: tense strategy for violence risk category
▶️ Starting task: tense strategy for hate_unfairness risk category


Scanning:  75%|█████████████████████▊       | 9/12 [13:21<06:48, 136.22s/scan, current=initializing]

Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/rot13_hate_unfairness_fc715886-f6c6-4ef5-a188-939bbcf2aa89.json".
✅ Completed task 6/12 (50.0%) - rot13/hate_unfairness in 376.0s
   Est. remaining: 13.5 minutes
Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/rot13_sexual_13e08b2d-4ff3-4573-ada9-f672008f48e6.json".
✅ Completed task 7/12 (58.3%) - rot13/sexual in 376.0s
   Est. remaining: 9.7 minutes
Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/rot13_self_harm_f7b9f626-04fa-4661-ba89-e9beb9d16eb3.json".
✅ Completed task 8/12 (66.7%) - rot13/self_harm in 376.0s
   Est. remaining: 6.8 minutes
Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming prepara

Scanning:  83%|████████████████████████▏    | 10/12 [13:21<01:56, 58.45s/scan, current=initializing]

Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/tense_hate_unfairness_cae8a135-e507-436e-9052-6b12fd36fd40.json".
✅ Completed task 10/12 (83.3%) - tense/hate_unfairness in 376.3s
   Est. remaining: 2.7 minutes
▶️ Starting task: tense strategy for sexual risk category
▶️ Starting task: tense strategy for self_harm risk category


Scanning:  92%|██████████████████████████▌  | 11/12 [16:32<01:20, 80.85s/scan, current=initializing]

Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/tense_sexual_885eb94c-8466-4ae9-9754-2c893199da55.json".
✅ Completed task 11/12 (91.7%) - tense/sexual in 190.6s
   Est. remaining: 1.5 minutes


Scanning: 100%|█████████████████████████████| 12/12 [16:32<00:00, 82.70s/scan, current=initializing]


Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/tense_self_harm_44558c0b-33a9-405e-9ee2-bf3b877a2142.json".
✅ Completed task 12/12 (100.0%) - tense/self_harm in 190.9s
   Est. remaining: 0.0 minutes


Class RedTeamResult: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/red_team_output.json".

Evaluation results saved to "/Users/kendrickkoh/Desktop/University/FYP/Red teaming preparation/azure red team/.scan_Basic-Callback-Scan_20251024_144456/final_results.json".

Overall ASR: 18.33%
Attack Success: 11/60 attacks were successful
------------------------------------------------------------------------------------------------------------------------------------
Risk Category        | Baseline ASR   | Easy-Complexity Attacks ASR  | Moderate-Complexity Attacks ASR | Difficult-Complexity Attacks ASR
------------------------------------------------------------------------------------------------------------------------------------
Violence             | 20.0%          | 40.0%                        | 40.0%                           | N/A                           
Hate-unfairness      | 20.0%          | 20.0%                        | 20.0%          

In [8]:
# # Run the red team scan with multiple attack strategies
# advanced_result = await model_red_team.scan(
#     target=azure_openai_callback,
#     scan_name="Advanced-Callback-Scan",
#     attack_strategies=[
#         AttackStrategy.EASY,  # Group of easy complexity attacks
#         AttackStrategy.MODERATE,  # Group of moderate complexity attacks
#         AttackStrategy.CharacterSpace,  # Add character spaces
#         AttackStrategy.ROT13,  # Use ROT13 encoding
#         AttackStrategy.UnicodeConfusable,  # Use confusable Unicode characters
#         AttackStrategy.CharSwap,  # Swap characters in prompts
#         AttackStrategy.Morse,  # Encode prompts in Morse code
#         AttackStrategy.Leetspeak,  # Use Leetspeak
#         AttackStrategy.Url,  # Use URLs in prompts
#         AttackStrategy.Binary,  # Encode prompts in binary
#         AttackStrategy.Compose([AttackStrategy.Base64, AttackStrategy.ROT13]),  # Use two strategies in one attack
#     ],
#     output_path="Advanced-Callback-Scan.json",
# )